Search in sources :

Example 1 with Referenceable

use of org.apache.atlas.typesystem.Referenceable in project incubator-atlas by apache.

the class HiveHook method createOrUpdateEntities.

private LinkedHashMap<Type, Referenceable> createOrUpdateEntities(HiveMetaStoreBridge dgiBridge, HiveEventContext event, Entity entity, boolean skipTempTables, Table existTable) throws AtlasHookException {
    try {
        Database db = null;
        Table table = null;
        Partition partition = null;
        LinkedHashMap<Type, Referenceable> result = new LinkedHashMap<>();
        List<Referenceable> entities = new ArrayList<>();
        switch(entity.getType()) {
            case DATABASE:
                db = entity.getDatabase();
                break;
            case TABLE:
                table = entity.getTable();
                db = dgiBridge.hiveClient.getDatabase(table.getDbName());
                break;
            case PARTITION:
                partition = entity.getPartition();
                table = partition.getTable();
                db = dgiBridge.hiveClient.getDatabase(table.getDbName());
                break;
            default:
                LOG.info("{}: entity-type not handled by Atlas hook. Ignored", entity.getType());
        }
        if (db != null) {
            db = dgiBridge.hiveClient.getDatabase(db.getName());
        }
        if (db != null) {
            Referenceable dbEntity = dgiBridge.createDBInstance(db);
            entities.add(dbEntity);
            result.put(Type.DATABASE, dbEntity);
            Referenceable tableEntity = null;
            if (table != null) {
                if (existTable != null) {
                    table = existTable;
                } else {
                    table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName());
                }
                // we create the table since we need the HDFS path to temp table lineage.
                if (skipTempTables && table.isTemporary() && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
                    LOG.debug("Skipping temporary table registration {} since it is not an external table {} ", table.getTableName(), table.getTableType().name());
                } else {
                    tableEntity = dgiBridge.createTableInstance(dbEntity, table);
                    entities.add(tableEntity);
                    result.put(Type.TABLE, tableEntity);
                }
            }
            event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
        }
        return result;
    } catch (Exception e) {
        throw new AtlasHookException("HiveHook.createOrUpdateEntities() failed.", e);
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) AtlasHookException(org.apache.atlas.hook.AtlasHookException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MalformedURLException(java.net.MalformedURLException) AtlasHookException(org.apache.atlas.hook.AtlasHookException) LinkedHashMap(java.util.LinkedHashMap) Type(org.apache.hadoop.hive.ql.hooks.Entity.Type) TableType(org.apache.hadoop.hive.metastore.TableType) HookNotification(org.apache.atlas.notification.hook.HookNotification) Referenceable(org.apache.atlas.typesystem.Referenceable) Database(org.apache.hadoop.hive.metastore.api.Database)

Example 2 with Referenceable

use of org.apache.atlas.typesystem.Referenceable in project incubator-atlas by apache.

the class HiveHook method renameTable.

private void renameTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws AtlasHookException {
    try {
        //crappy, no easy of getting new name
        assert event.getInputs() != null && event.getInputs().size() == 1;
        assert event.getOutputs() != null && event.getOutputs().size() > 0;
        //Update entity if not exists
        ReadEntity oldEntity = event.getInputs().iterator().next();
        Table oldTable = oldEntity.getTable();
        for (WriteEntity writeEntity : event.getOutputs()) {
            if (writeEntity.getType() == Entity.Type.TABLE) {
                Table newTable = writeEntity.getTable();
                //Hive sends with both old and new table names in the outputs which is weird. So skipping that with the below check
                if (!newTable.getDbName().equals(oldTable.getDbName()) || !newTable.getTableName().equals(oldTable.getTableName())) {
                    final String oldQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(), oldTable);
                    final String newQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable);
                    //Create/update old table entity - create entity with oldQFNme and old tableName if it doesnt exist. If exists, will update
                    //We always use the new entity while creating the table since some flags, attributes of the table are not set in inputEntity and Hive.getTable(oldTableName) also fails since the table doesnt exist in hive anymore
                    final LinkedHashMap<Type, Referenceable> tables = createOrUpdateEntities(dgiBridge, event, writeEntity, true);
                    Referenceable tableEntity = tables.get(Type.TABLE);
                    //Reset regular column QF Name to old Name and create a new partial notification request to replace old column QFName to newName to retain any existing traits
                    replaceColumnQFName(event, (List<Referenceable>) tableEntity.get(HiveMetaStoreBridge.COLUMNS), oldQualifiedName, newQualifiedName);
                    //Reset partition key column QF Name to old Name and create a new partial notification request to replace old column QFName to newName to retain any existing traits
                    replaceColumnQFName(event, (List<Referenceable>) tableEntity.get(HiveMetaStoreBridge.PART_COLS), oldQualifiedName, newQualifiedName);
                    //Reset SD QF Name to old Name and create a new partial notification request to replace old SD QFName to newName to retain any existing traits
                    replaceSDQFName(event, tableEntity, oldQualifiedName, newQualifiedName);
                    //Reset Table QF Name to old Name and create a new partial notification request to replace old Table QFName to newName
                    replaceTableQFName(event, oldTable, newTable, tableEntity, oldQualifiedName, newQualifiedName);
                }
            }
        }
    } catch (Exception e) {
        throw new AtlasHookException("HiveHook.renameTable() failed.", e);
    }
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Type(org.apache.hadoop.hive.ql.hooks.Entity.Type) TableType(org.apache.hadoop.hive.metastore.TableType) Table(org.apache.hadoop.hive.ql.metadata.Table) Referenceable(org.apache.atlas.typesystem.Referenceable) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) AtlasHookException(org.apache.atlas.hook.AtlasHookException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MalformedURLException(java.net.MalformedURLException) AtlasHookException(org.apache.atlas.hook.AtlasHookException)

Example 3 with Referenceable

use of org.apache.atlas.typesystem.Referenceable in project incubator-atlas by apache.

the class HiveHook method processHiveEntity.

private <T extends Entity> void processHiveEntity(HiveMetaStoreBridge dgiBridge, HiveEventContext event, T entity, Set<String> dataSetsProcessed, SortedMap<T, Referenceable> dataSets, Set<Referenceable> entities) throws AtlasHookException {
    try {
        if (entity.getType() == Type.TABLE || entity.getType() == Type.PARTITION) {
            final String tblQFName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(), entity.getTable());
            if (!dataSetsProcessed.contains(tblQFName)) {
                LinkedHashMap<Type, Referenceable> result = createOrUpdateEntities(dgiBridge, event, entity, false);
                dataSets.put(entity, result.get(Type.TABLE));
                dataSetsProcessed.add(tblQFName);
                entities.addAll(result.values());
            }
        } else if (entity.getType() == Type.DFS_DIR) {
            URI location = entity.getLocation();
            if (location != null) {
                final String pathUri = lower(new Path(location).toString());
                LOG.debug("Registering DFS Path {} ", pathUri);
                if (!dataSetsProcessed.contains(pathUri)) {
                    Referenceable hdfsPath = dgiBridge.fillHDFSDataSet(pathUri);
                    dataSets.put(entity, hdfsPath);
                    dataSetsProcessed.add(pathUri);
                    entities.add(hdfsPath);
                }
            }
        }
    } catch (Exception e) {
        throw new AtlasHookException("HiveHook.processHiveEntity() failed.", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Type(org.apache.hadoop.hive.ql.hooks.Entity.Type) TableType(org.apache.hadoop.hive.metastore.TableType) Referenceable(org.apache.atlas.typesystem.Referenceable) URI(java.net.URI) AtlasHookException(org.apache.atlas.hook.AtlasHookException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MalformedURLException(java.net.MalformedURLException) AtlasHookException(org.apache.atlas.hook.AtlasHookException)

Example 4 with Referenceable

use of org.apache.atlas.typesystem.Referenceable in project incubator-atlas by apache.

the class HiveHook method renameColumn.

private void renameColumn(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws AtlasHookException {
    try {
        assert event.getInputs() != null && event.getInputs().size() == 1;
        assert event.getOutputs() != null && event.getOutputs().size() > 0;
        Table oldTable = event.getInputs().iterator().next().getTable();
        List<FieldSchema> oldColList = oldTable.getAllCols();
        Table outputTbl = event.getOutputs().iterator().next().getTable();
        outputTbl = dgiBridge.hiveClient.getTable(outputTbl.getDbName(), outputTbl.getTableName());
        List<FieldSchema> newColList = outputTbl.getAllCols();
        assert oldColList.size() == newColList.size();
        Pair<String, String> changedColNamePair = findChangedColNames(oldColList, newColList);
        String oldColName = changedColNamePair.getLeft();
        String newColName = changedColNamePair.getRight();
        for (WriteEntity writeEntity : event.getOutputs()) {
            if (writeEntity.getType() == Type.TABLE) {
                Table newTable = writeEntity.getTable();
                createOrUpdateEntities(dgiBridge, event, writeEntity, true, oldTable);
                final String newQualifiedTableName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable);
                String oldColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, oldColName);
                String newColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, newColName);
                Referenceable newColEntity = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
                newColEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newColumnQFName);
                event.addMessage(new HookNotification.EntityPartialUpdateRequest(event.getUser(), HiveDataTypes.HIVE_COLUMN.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, oldColumnQFName, newColEntity));
            }
        }
        handleEventOutputs(dgiBridge, event, Type.TABLE);
    } catch (Exception e) {
        throw new AtlasHookException("HiveHook.renameColumn() failed.", e);
    }
}
Also used : Table(org.apache.hadoop.hive.ql.metadata.Table) HookNotification(org.apache.atlas.notification.hook.HookNotification) Referenceable(org.apache.atlas.typesystem.Referenceable) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) AtlasHookException(org.apache.atlas.hook.AtlasHookException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MalformedURLException(java.net.MalformedURLException) AtlasHookException(org.apache.atlas.hook.AtlasHookException)

Example 5 with Referenceable

use of org.apache.atlas.typesystem.Referenceable in project incubator-atlas by apache.

the class HiveHook method replaceColumnQFName.

private List<Referenceable> replaceColumnQFName(final HiveEventContext event, final List<Referenceable> cols, final String oldTableQFName, final String newTableQFName) {
    List<Referenceable> newColEntities = new ArrayList<>();
    for (Referenceable col : cols) {
        final String colName = (String) col.get(AtlasClient.NAME);
        String oldColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(oldTableQFName, colName);
        String newColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newTableQFName, colName);
        col.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, oldColumnQFName);
        Referenceable newColEntity = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
        ///Only QF Name changes
        newColEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newColumnQFName);
        event.addMessage(new HookNotification.EntityPartialUpdateRequest(event.getUser(), HiveDataTypes.HIVE_COLUMN.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, oldColumnQFName, newColEntity));
        newColEntities.add(newColEntity);
    }
    return newColEntities;
}
Also used : HookNotification(org.apache.atlas.notification.hook.HookNotification) Referenceable(org.apache.atlas.typesystem.Referenceable) ArrayList(java.util.ArrayList)

Aggregations

Referenceable (org.apache.atlas.typesystem.Referenceable)235 Test (org.testng.annotations.Test)114 Id (org.apache.atlas.typesystem.persistence.Id)50 ArrayList (java.util.ArrayList)45 List (java.util.List)25 Struct (org.apache.atlas.typesystem.Struct)25 HashMap (java.util.HashMap)24 BeforeTest (org.testng.annotations.BeforeTest)24 ITypedReferenceableInstance (org.apache.atlas.typesystem.ITypedReferenceableInstance)22 AfterTest (org.testng.annotations.AfterTest)22 HookNotification (org.apache.atlas.notification.hook.HookNotification)20 IStruct (org.apache.atlas.typesystem.IStruct)18 ClassType (org.apache.atlas.typesystem.types.ClassType)16 JSONObject (org.codehaus.jettison.json.JSONObject)16 ImmutableList (com.google.common.collect.ImmutableList)15 AtlasServiceException (org.apache.atlas.AtlasServiceException)14 TraitType (org.apache.atlas.typesystem.types.TraitType)12 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)12 Date (java.util.Date)11 AtlasException (org.apache.atlas.AtlasException)11