use of org.apache.atlas.v1.model.instance.Referenceable in project atlas by apache.
the class StormAtlasHook method createTopologyInstance.
private Referenceable createTopologyInstance(TopologyInfo topologyInfo, Map stormConf) {
Referenceable topologyReferenceable = new Referenceable(StormDataTypes.STORM_TOPOLOGY.getName());
topologyReferenceable.set("id", topologyInfo.get_id());
topologyReferenceable.set(AtlasClient.NAME, topologyInfo.get_name());
topologyReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, topologyInfo.get_name());
String owner = topologyInfo.get_owner();
if (StringUtils.isEmpty(owner)) {
owner = ANONYMOUS_OWNER;
}
topologyReferenceable.set(AtlasClient.OWNER, owner);
topologyReferenceable.set("startTime", new Date(System.currentTimeMillis()));
topologyReferenceable.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, getClusterName(stormConf));
return topologyReferenceable;
}
use of org.apache.atlas.v1.model.instance.Referenceable in project atlas by apache.
the class StormAtlasHook method addTopologyOutputs.
private void addTopologyOutputs(Referenceable topologyReferenceable, StormTopology stormTopology, String topologyOwner, Map stormConf, List<Referenceable> dependentEntities) {
final ArrayList<Referenceable> outputDataSets = new ArrayList<>();
Map<String, Bolt> bolts = stormTopology.get_bolts();
Set<String> terminalBoltNames = StormTopologyUtil.getTerminalUserBoltNames(stormTopology);
for (String terminalBoltName : terminalBoltNames) {
Serializable instance = Utils.javaDeserialize(bolts.get(terminalBoltName).get_bolt_object().get_serialized_java(), Serializable.class);
String dataSetType = instance.getClass().getSimpleName();
final Referenceable datasetRef = createDataSet(dataSetType, topologyOwner, instance, stormConf, dependentEntities);
if (datasetRef != null) {
outputDataSets.add(datasetRef);
}
}
topologyReferenceable.set("outputs", outputDataSets);
}
use of org.apache.atlas.v1.model.instance.Referenceable in project atlas by apache.
the class StormAtlasHook method addSpouts.
private void addSpouts(Map<String, SpoutSpec> spouts, Map<String, Referenceable> nodeEntities) {
for (Map.Entry<String, SpoutSpec> entry : spouts.entrySet()) {
final String spoutName = entry.getKey();
Referenceable spoutReferenceable = createSpoutInstance(spoutName, entry.getValue());
nodeEntities.put(spoutName, spoutReferenceable);
}
}
use of org.apache.atlas.v1.model.instance.Referenceable in project atlas by apache.
the class StormAtlasHook method addTopologyInputs.
private void addTopologyInputs(Referenceable topologyReferenceable, Map<String, SpoutSpec> spouts, Map stormConf, String topologyOwner, List<Referenceable> dependentEntities) {
final ArrayList<Referenceable> inputDataSets = new ArrayList<>();
for (Map.Entry<String, SpoutSpec> entry : spouts.entrySet()) {
Serializable instance = Utils.javaDeserialize(entry.getValue().get_spout_object().get_serialized_java(), Serializable.class);
String simpleName = instance.getClass().getSimpleName();
final Referenceable datasetRef = createDataSet(simpleName, topologyOwner, instance, stormConf, dependentEntities);
if (datasetRef != null) {
inputDataSets.add(datasetRef);
}
}
topologyReferenceable.set("inputs", inputDataSets);
}
use of org.apache.atlas.v1.model.instance.Referenceable in project atlas by apache.
the class StormAtlasHook method createDataSet.
private Referenceable createDataSet(String name, String topologyOwner, Serializable instance, Map stormConf, List<Referenceable> dependentEntities) {
Map<String, String> config = StormTopologyUtil.getFieldValues(instance, true, null);
String clusterName = null;
Referenceable dataSetReferenceable;
// todo: need to redo this with a config driven approach
switch(name) {
case "KafkaSpout":
{
String topicName = config.get("KafkaSpout.kafkaSpoutConfig.translator.topic");
String uri = config.get("KafkaSpout.kafkaSpoutConfig.kafkaProps.bootstrap.servers");
if (StringUtils.isEmpty(topicName)) {
topicName = config.get("KafkaSpout._spoutConfig.topic");
}
if (StringUtils.isEmpty(uri)) {
uri = config.get("KafkaSpout._spoutConfig.hosts.brokerZkStr");
}
dataSetReferenceable = new Referenceable(StormDataTypes.KAFKA_TOPIC.getName());
dataSetReferenceable.set("topic", topicName);
dataSetReferenceable.set("uri", uri);
if (StringUtils.isEmpty(topologyOwner)) {
topologyOwner = ANONYMOUS_OWNER;
}
dataSetReferenceable.set(AtlasClient.OWNER, topologyOwner);
dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getKafkaTopicQualifiedName(getClusterName(stormConf), topicName));
dataSetReferenceable.set(AtlasClient.NAME, topicName);
}
break;
case "HBaseBolt":
{
dataSetReferenceable = new Referenceable(StormDataTypes.HBASE_TABLE.getName());
final String hbaseTableName = config.get("HBaseBolt.tableName");
String uri = config.get("hbase.rootdir");
if (StringUtils.isEmpty(uri)) {
uri = hbaseTableName;
}
dataSetReferenceable.set("uri", hbaseTableName);
dataSetReferenceable.set(AtlasClient.NAME, uri);
dataSetReferenceable.set(AtlasClient.OWNER, stormConf.get("storm.kerberos.principal"));
clusterName = extractComponentClusterName(HBaseConfiguration.create(), stormConf);
// TODO - Hbase Namespace is hardcoded to 'default'. need to check how to get this or is it already part of tableName
dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getHbaseTableQualifiedName(clusterName, HBASE_NAMESPACE_DEFAULT, hbaseTableName));
}
break;
case "HdfsBolt":
dataSetReferenceable = new Referenceable(HiveMetaStoreBridge.HDFS_PATH);
String hdfsUri = config.get("HdfsBolt.rotationActions") == null ? config.get("HdfsBolt.fileNameFormat.path") : config.get("HdfsBolt.rotationActions");
final String hdfsPathStr = config.get("HdfsBolt.fsUrl") + hdfsUri;
final String nameServiceID = hdfsNameServiceResolver.getNameServiceIDForPath(hdfsPathStr);
clusterName = getClusterName(stormConf);
dataSetReferenceable.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, getClusterName(stormConf));
if (StringUtils.isNotEmpty(nameServiceID)) {
String updatedPath = hdfsNameServiceResolver.getPathWithNameServiceID(hdfsPathStr);
dataSetReferenceable.set("path", updatedPath);
dataSetReferenceable.set("nameServiceId", nameServiceID);
dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getHdfsPathQualifiedName(clusterName, updatedPath));
} else {
dataSetReferenceable.set("path", hdfsPathStr);
dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getHdfsPathQualifiedName(clusterName, hdfsPathStr));
}
dataSetReferenceable.set(AtlasClient.OWNER, stormConf.get("hdfs.kerberos.principal"));
final Path hdfsPath = new Path(hdfsPathStr);
dataSetReferenceable.set(AtlasClient.NAME, Path.getPathWithoutSchemeAndAuthority(hdfsPath).toString().toLowerCase());
break;
case "HiveBolt":
// todo: verify if hive table has everything needed to retrieve existing table
Referenceable dbReferenceable = new Referenceable("hive_db");
String databaseName = config.get("HiveBolt.options.databaseName");
dbReferenceable.set(AtlasClient.NAME, databaseName);
dbReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getDBQualifiedName(getClusterName(stormConf), databaseName));
dbReferenceable.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, getClusterName(stormConf));
dependentEntities.add(dbReferenceable);
clusterName = extractComponentClusterName(new HiveConf(), stormConf);
final String hiveTableName = config.get("HiveBolt.options.tableName");
dataSetReferenceable = new Referenceable("hive_table");
final String tableQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(clusterName, databaseName, hiveTableName);
dataSetReferenceable.set(AtlasClient.NAME, hiveTableName);
dataSetReferenceable.set(ATTRIBUTE_DB, dbReferenceable);
dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName);
break;
default:
// TODO - What should we do for custom data sets. Not sure what name we can set here?
return null;
}
dependentEntities.add(dataSetReferenceable);
return dataSetReferenceable;
}
Aggregations