Search in sources :

Example 1 with Feed

use of org.apache.falcon.entity.v0.feed.Feed in project atlas by apache.

the class FalconBridge method createProcessEntity.

/**
 * Creates process entity
 *
 * @param process process entity
 * @param falconStore config store
 * @return process instance reference
 *
 * @throws FalconException if retrieving from the configuration store fail
 */
public static List<Referenceable> createProcessEntity(org.apache.falcon.entity.v0.process.Process process, ConfigurationStore falconStore) throws FalconException {
    LOG.info("Creating process Entity : {}", process.getName());
    // The requirement is for each cluster, create a process entity with name
    // clustername.processname
    List<Referenceable> entities = new ArrayList<>();
    if (process.getClusters() != null) {
        for (Cluster processCluster : process.getClusters().getClusters()) {
            org.apache.falcon.entity.v0.cluster.Cluster cluster = falconStore.get(EntityType.CLUSTER, processCluster.getName());
            Referenceable clusterReferenceable = getClusterEntityReference(cluster.getName(), cluster.getColo());
            entities.add(clusterReferenceable);
            List<Referenceable> inputs = new ArrayList<>();
            if (process.getInputs() != null) {
                for (Input input : process.getInputs().getInputs()) {
                    Feed feed = falconStore.get(EntityType.FEED, input.getFeed());
                    Referenceable inputReferenceable = getFeedDataSetReference(feed, clusterReferenceable);
                    entities.add(inputReferenceable);
                    inputs.add(inputReferenceable);
                }
            }
            List<Referenceable> outputs = new ArrayList<>();
            if (process.getOutputs() != null) {
                for (Output output : process.getOutputs().getOutputs()) {
                    Feed feed = falconStore.get(EntityType.FEED, output.getFeed());
                    Referenceable outputReferenceable = getFeedDataSetReference(feed, clusterReferenceable);
                    entities.add(outputReferenceable);
                    outputs.add(outputReferenceable);
                }
            }
            if (!inputs.isEmpty() || !outputs.isEmpty()) {
                Referenceable processEntity = new Referenceable(FalconDataTypes.FALCON_PROCESS.getName());
                processEntity.set(AtlasClient.NAME, process.getName());
                processEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getProcessQualifiedName(process.getName(), cluster.getName()));
                processEntity.set(FalconBridge.FREQUENCY, process.getFrequency().toString());
                if (!inputs.isEmpty()) {
                    processEntity.set(AtlasClient.PROCESS_ATTRIBUTE_INPUTS, inputs);
                }
                if (!outputs.isEmpty()) {
                    processEntity.set(AtlasClient.PROCESS_ATTRIBUTE_OUTPUTS, outputs);
                }
                // set cluster
                processEntity.set(FalconBridge.RUNSON, clusterReferenceable);
                // Set user
                if (process.getACL() != null) {
                    processEntity.set(AtlasClient.OWNER, process.getACL().getOwner());
                }
                if (StringUtils.isNotEmpty(process.getTags())) {
                    processEntity.set(FalconBridge.TAGS, EventUtil.convertKeyValueStringToMap(process.getTags()));
                }
                if (process.getPipelines() != null) {
                    processEntity.set(FalconBridge.PIPELINES, process.getPipelines());
                }
                processEntity.set(FalconBridge.WFPROPERTIES, getProcessEntityWFProperties(process.getWorkflow(), process.getName()));
                entities.add(processEntity);
            }
        }
    }
    return entities;
}
Also used : Input(org.apache.falcon.entity.v0.process.Input) Referenceable(org.apache.atlas.v1.model.instance.Referenceable) Output(org.apache.falcon.entity.v0.process.Output) ArrayList(java.util.ArrayList) Cluster(org.apache.falcon.entity.v0.process.Cluster) Feed(org.apache.falcon.entity.v0.feed.Feed)

Example 2 with Feed

use of org.apache.falcon.entity.v0.feed.Feed in project atlas by apache.

the class FalconHookIT method getTableFeed.

private Feed getTableFeed(String feedResource, String clusterName, String secondClusterName) throws Exception {
    Feed feed = loadEntity(EntityType.FEED, feedResource, "feed" + random());
    org.apache.falcon.entity.v0.feed.Cluster feedCluster = feed.getClusters().getClusters().get(0);
    feedCluster.setName(clusterName);
    String dbName = "db" + random();
    String tableName = "table" + random();
    feedCluster.getTable().setUri(getTableUri(dbName, tableName));
    String dbName2 = "db" + random();
    String tableName2 = "table" + random();
    if (secondClusterName != null) {
        org.apache.falcon.entity.v0.feed.Cluster feedCluster2 = feed.getClusters().getClusters().get(1);
        feedCluster2.setName(secondClusterName);
        feedCluster2.getTable().setUri(getTableUri(dbName2, tableName2));
    }
    STORE.publish(EntityType.FEED, feed);
    String feedId = assertFeedIsRegistered(feed, clusterName);
    assertFeedAttributes(feedId);
    verifyFeedLineage(feed.getName(), clusterName, feedId, dbName, tableName);
    if (secondClusterName != null) {
        String feedId2 = assertFeedIsRegistered(feed, secondClusterName);
        assertFeedAttributes(feedId2);
        verifyFeedLineage(feed.getName(), secondClusterName, feedId2, dbName2, tableName2);
    }
    return feed;
}
Also used : Feed(org.apache.falcon.entity.v0.feed.Feed)

Example 3 with Feed

use of org.apache.falcon.entity.v0.feed.Feed in project atlas by apache.

the class FalconHookIT method testCreateProcess.

@Test
public void testCreateProcess() throws Exception {
    Cluster cluster = loadEntity(EntityType.CLUSTER, CLUSTER_RESOURCE, "cluster" + random());
    STORE.publish(EntityType.CLUSTER, cluster);
    assertClusterIsRegistered(cluster);
    Feed infeed = getTableFeed(FEED_RESOURCE, cluster.getName(), null);
    String infeedId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, FalconBridge.getFeedQualifiedName(infeed.getName(), cluster.getName())).getId()._getId();
    Feed outfeed = getTableFeed(FEED_RESOURCE, cluster.getName());
    String outFeedId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, FalconBridge.getFeedQualifiedName(outfeed.getName(), cluster.getName())).getId()._getId();
    Process process = loadEntity(EntityType.PROCESS, PROCESS_RESOURCE, "process" + random());
    process.getClusters().getClusters().get(0).setName(cluster.getName());
    process.getInputs().getInputs().get(0).setFeed(infeed.getName());
    process.getOutputs().getOutputs().get(0).setFeed(outfeed.getName());
    STORE.publish(EntityType.PROCESS, process);
    String pid = assertProcessIsRegistered(process, cluster.getName());
    Referenceable processEntity = atlasClient.getEntity(pid);
    assertNotNull(processEntity);
    assertEquals(processEntity.get(AtlasClient.NAME), process.getName());
    assertEquals(((List<Id>) processEntity.get("inputs")).get(0)._getId(), infeedId);
    assertEquals(((List<Id>) processEntity.get("outputs")).get(0)._getId(), outFeedId);
}
Also used : Referenceable(org.apache.atlas.v1.model.instance.Referenceable) Cluster(org.apache.falcon.entity.v0.cluster.Cluster) Process(org.apache.falcon.entity.v0.process.Process) List(java.util.List) Feed(org.apache.falcon.entity.v0.feed.Feed) Test(org.testng.annotations.Test)

Example 4 with Feed

use of org.apache.falcon.entity.v0.feed.Feed in project incubator-atlas by apache.

the class FalconHookIT method getHDFSFeed.

private TypeUtils.Pair<String, Feed> getHDFSFeed(String feedResource, String clusterName) throws Exception {
    Feed feed = loadEntity(EntityType.FEED, feedResource, "feed" + random());
    org.apache.falcon.entity.v0.feed.Cluster feedCluster = feed.getClusters().getClusters().get(0);
    feedCluster.setName(clusterName);
    STORE.publish(EntityType.FEED, feed);
    String feedId = assertFeedIsRegistered(feed, clusterName);
    assertFeedAttributes(feedId);
    String processId = assertEntityIsRegistered(FalconDataTypes.FALCON_FEED_CREATION.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, FalconBridge.getFeedQualifiedName(feed.getName(), clusterName));
    Referenceable processEntity = atlasClient.getEntity(processId);
    assertEquals(((List<Id>) processEntity.get("outputs")).get(0).getId()._getId(), feedId);
    String inputId = ((List<Id>) processEntity.get("inputs")).get(0).getId()._getId();
    Referenceable pathEntity = atlasClient.getEntity(inputId);
    assertEquals(pathEntity.getTypeName(), HiveMetaStoreBridge.HDFS_PATH);
    List<Location> locations = FeedHelper.getLocations(feedCluster, feed);
    Location dataLocation = FileSystemStorage.getLocation(locations, LocationType.DATA);
    assertEquals(pathEntity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), FalconBridge.normalize(dataLocation.getPath()));
    return TypeUtils.Pair.of(feedId, feed);
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) List(java.util.List) Id(org.apache.atlas.typesystem.persistence.Id) Feed(org.apache.falcon.entity.v0.feed.Feed) Location(org.apache.falcon.entity.v0.feed.Location)

Example 5 with Feed

use of org.apache.falcon.entity.v0.feed.Feed in project incubator-atlas by apache.

the class FalconHookIT method testCreateProcess.

@Test
public void testCreateProcess() throws Exception {
    Cluster cluster = loadEntity(EntityType.CLUSTER, CLUSTER_RESOURCE, "cluster" + random());
    STORE.publish(EntityType.CLUSTER, cluster);
    assertClusterIsRegistered(cluster);
    Feed infeed = getTableFeed(FEED_RESOURCE, cluster.getName(), null);
    String infeedId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, FalconBridge.getFeedQualifiedName(infeed.getName(), cluster.getName())).getId()._getId();
    Feed outfeed = getTableFeed(FEED_RESOURCE, cluster.getName());
    String outFeedId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, FalconBridge.getFeedQualifiedName(outfeed.getName(), cluster.getName())).getId()._getId();
    Process process = loadEntity(EntityType.PROCESS, PROCESS_RESOURCE, "process" + random());
    process.getClusters().getClusters().get(0).setName(cluster.getName());
    process.getInputs().getInputs().get(0).setFeed(infeed.getName());
    process.getOutputs().getOutputs().get(0).setFeed(outfeed.getName());
    STORE.publish(EntityType.PROCESS, process);
    String pid = assertProcessIsRegistered(process, cluster.getName());
    Referenceable processEntity = atlasClient.getEntity(pid);
    assertNotNull(processEntity);
    assertEquals(processEntity.get(AtlasClient.NAME), process.getName());
    assertEquals(((List<Id>) processEntity.get("inputs")).get(0)._getId(), infeedId);
    assertEquals(((List<Id>) processEntity.get("outputs")).get(0)._getId(), outFeedId);
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) Cluster(org.apache.falcon.entity.v0.cluster.Cluster) Process(org.apache.falcon.entity.v0.process.Process) List(java.util.List) Feed(org.apache.falcon.entity.v0.feed.Feed) Test(org.testng.annotations.Test)

Aggregations

Feed (org.apache.falcon.entity.v0.feed.Feed)12 List (java.util.List)8 Cluster (org.apache.falcon.entity.v0.cluster.Cluster)6 Test (org.testng.annotations.Test)6 Referenceable (org.apache.atlas.typesystem.Referenceable)5 Referenceable (org.apache.atlas.v1.model.instance.Referenceable)5 Location (org.apache.falcon.entity.v0.feed.Location)4 Process (org.apache.falcon.entity.v0.process.Process)4 ArrayList (java.util.ArrayList)2 CatalogStorage (org.apache.falcon.entity.CatalogStorage)2 CatalogTable (org.apache.falcon.entity.v0.feed.CatalogTable)2 Cluster (org.apache.falcon.entity.v0.process.Cluster)2 Input (org.apache.falcon.entity.v0.process.Input)2 Output (org.apache.falcon.entity.v0.process.Output)2 Id (org.apache.atlas.typesystem.persistence.Id)1 TypeUtils (org.apache.atlas.typesystem.types.TypeUtils)1 Id (org.apache.atlas.v1.model.instance.Id)1 TypesUtil (org.apache.atlas.v1.typesystem.types.utils.TypesUtil)1