use of org.apache.atlas.typesystem.persistence.Id in project incubator-atlas by apache.
the class TestUtils method createInstance.
/**
* Creates an entity in the graph and does basic validation
* of the GuidMapping that was created in the process.
*
*/
public static String createInstance(MetadataService metadataService, Referenceable entity) throws Exception {
RequestContext.createContext();
String entityjson = InstanceSerialization.toJson(entity, true);
JSONArray entitiesJson = new JSONArray();
entitiesJson.put(entityjson);
CreateUpdateEntitiesResult creationResult = metadataService.createEntities(entitiesJson.toString());
Map<String, String> guidMap = creationResult.getGuidMapping().getGuidAssignments();
Map<Id, Referenceable> referencedObjects = findReferencedObjects(entity);
for (Map.Entry<Id, Referenceable> entry : referencedObjects.entrySet()) {
Id foundId = entry.getKey();
if (foundId.isUnassigned()) {
String guid = guidMap.get(entry.getKey()._getId());
Referenceable obj = entry.getValue();
loadAndDoSimpleValidation(guid, obj, metadataService);
}
}
List<String> guids = creationResult.getCreatedEntities();
if (guids != null && guids.size() > 0) {
return guids.get(guids.size() - 1);
}
return null;
}
use of org.apache.atlas.typesystem.persistence.Id in project incubator-atlas by apache.
the class QuickStart method createEntities.
void createEntities() throws Exception {
Id salesDB = database(SALES_DB, SALES_DB_DESCRIPTION, "John ETL", "hdfs://host:8000/apps/warehouse/sales");
Referenceable sd = rawStorageDescriptor("hdfs://host:8000/apps/warehouse/sales", "TextInputFormat", "TextOutputFormat", true);
List<Referenceable> salesFactColumns = ImmutableList.of(rawColumn(TIME_ID_COLUMN, "int", "time id"), rawColumn("product_id", "int", "product id"), rawColumn("customer_id", "int", "customer id", "PII_v1"), rawColumn("sales", "double", "product id", "Metric_v1"));
List<Referenceable> logFactColumns = ImmutableList.of(rawColumn("time_id", "int", "time id"), rawColumn("app_id", "int", "app id"), rawColumn("machine_id", "int", "machine id"), rawColumn("log", "string", "log data", "Log Data_v1"));
Id salesFact = table(SALES_FACT_TABLE, SALES_FACT_TABLE_DESCRIPTION, salesDB, sd, "Joe", "Managed", salesFactColumns, FACT_TRAIT);
List<Referenceable> productDimColumns = ImmutableList.of(rawColumn("product_id", "int", "product id"), rawColumn("product_name", "string", "product name"), rawColumn("brand_name", "int", "brand name"));
Id productDim = table(PRODUCT_DIM_TABLE, "product dimension table", salesDB, sd, "John Doe", "Managed", productDimColumns, "Dimension_v1");
List<Referenceable> timeDimColumns = ImmutableList.of(rawColumn("time_id", "int", "time id"), rawColumn("dayOfYear", "int", "day Of Year"), rawColumn("weekDay", "int", "week Day"));
Id timeDim = table(TIME_DIM_TABLE, "time dimension table", salesDB, sd, "John Doe", "External", timeDimColumns, "Dimension_v1");
List<Referenceable> customerDimColumns = ImmutableList.of(rawColumn("customer_id", "int", "customer id", "PII_v1"), rawColumn("name", "string", "customer name", "PII_v1"), rawColumn("address", "string", "customer address", "PII_v1"));
Id customerDim = table("customer_dim", "customer dimension table", salesDB, sd, "fetl", "External", customerDimColumns, "Dimension_v1");
Id reportingDB = database("Reporting", "reporting database", "Jane BI", "hdfs://host:8000/apps/warehouse/reporting");
Id logDB = database("Logging", "logging database", "Tim ETL", "hdfs://host:8000/apps/warehouse/logging");
Id salesFactDaily = table(SALES_FACT_DAILY_MV_TABLE, "sales fact daily materialized view", reportingDB, sd, "Joe BI", "Managed", salesFactColumns, "Metric_v1");
Id loggingFactDaily = table("log_fact_daily_mv", "log fact daily materialized view", logDB, sd, "Tim ETL", "Managed", logFactColumns, "Log Data_v1");
loadProcess(LOAD_SALES_DAILY_PROCESS, LOAD_SALES_DAILY_PROCESS_DESCRIPTION, "John ETL", ImmutableList.of(salesFact, timeDim), ImmutableList.of(salesFactDaily), "create table as select ", "plan", "id", "graph", "ETL_v1");
view(PRODUCT_DIM_VIEW, reportingDB, ImmutableList.of(productDim), "Dimension_v1", "JdbcAccess_v1");
view("customer_dim_view", reportingDB, ImmutableList.of(customerDim), "Dimension_v1", "JdbcAccess_v1");
Id salesFactMonthly = table("sales_fact_monthly_mv", "sales fact monthly materialized view", reportingDB, sd, "Jane BI", "Managed", salesFactColumns, "Metric_v1");
loadProcess("loadSalesMonthly", "hive query for monthly summary", "John ETL", ImmutableList.of(salesFactDaily), ImmutableList.of(salesFactMonthly), "create table as select ", "plan", "id", "graph", "ETL_v1");
Id loggingFactMonthly = table("logging_fact_monthly_mv", "logging fact monthly materialized view", logDB, sd, "Tim ETL", "Managed", logFactColumns, "Log Data_v1");
loadProcess("loadLogsMonthly", "hive query for monthly summary", "Tim ETL", ImmutableList.of(loggingFactDaily), ImmutableList.of(loggingFactMonthly), "create table as select ", "plan", "id", "graph", "ETL_v1");
}
use of org.apache.atlas.typesystem.persistence.Id in project incubator-atlas by apache.
the class FalconHookIT method getHDFSFeed.
private TypeUtils.Pair<String, Feed> getHDFSFeed(String feedResource, String clusterName) throws Exception {
Feed feed = loadEntity(EntityType.FEED, feedResource, "feed" + random());
org.apache.falcon.entity.v0.feed.Cluster feedCluster = feed.getClusters().getClusters().get(0);
feedCluster.setName(clusterName);
STORE.publish(EntityType.FEED, feed);
String feedId = assertFeedIsRegistered(feed, clusterName);
assertFeedAttributes(feedId);
String processId = assertEntityIsRegistered(FalconDataTypes.FALCON_FEED_CREATION.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, FalconBridge.getFeedQualifiedName(feed.getName(), clusterName));
Referenceable processEntity = atlasClient.getEntity(processId);
assertEquals(((List<Id>) processEntity.get("outputs")).get(0).getId()._getId(), feedId);
String inputId = ((List<Id>) processEntity.get("inputs")).get(0).getId()._getId();
Referenceable pathEntity = atlasClient.getEntity(inputId);
assertEquals(pathEntity.getTypeName(), HiveMetaStoreBridge.HDFS_PATH);
List<Location> locations = FeedHelper.getLocations(feedCluster, feed);
Location dataLocation = FileSystemStorage.getLocation(locations, LocationType.DATA);
assertEquals(pathEntity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), FalconBridge.normalize(dataLocation.getPath()));
return TypeUtils.Pair.of(feedId, feed);
}
use of org.apache.atlas.typesystem.persistence.Id in project incubator-atlas by apache.
the class HiveITBase method validateHDFSPaths.
protected void validateHDFSPaths(Referenceable processReference, String attributeName, String... testPaths) throws Exception {
List<Id> hdfsPathRefs = (List<Id>) processReference.get(attributeName);
for (String testPath : testPaths) {
final Path path = new Path(testPath);
final String testPathNormed = lower(path.toString());
String hdfsPathId = assertHDFSPathIsRegistered(testPathNormed);
Assert.assertEquals(hdfsPathRefs.get(0)._getId(), hdfsPathId);
Referenceable hdfsPathRef = atlasClient.getEntity(hdfsPathId);
Assert.assertEquals(hdfsPathRef.get("path"), testPathNormed);
Assert.assertEquals(hdfsPathRef.get(NAME), Path.getPathWithoutSchemeAndAuthority(path).toString().toLowerCase());
Assert.assertEquals(hdfsPathRef.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), testPathNormed);
}
}
use of org.apache.atlas.typesystem.persistence.Id in project incubator-atlas by apache.
the class HiveHookIT method validateProcess.
private Referenceable validateProcess(HiveHook.HiveEventContext event, Set<ReadEntity> inputTables, Set<WriteEntity> outputTables) throws Exception {
String processId = assertProcessIsRegistered(event, inputTables, outputTables);
Referenceable process = atlasClient.getEntity(processId);
if (inputTables == null) {
Assert.assertNull(process.get(INPUTS));
} else {
Assert.assertEquals(((List<Referenceable>) process.get(INPUTS)).size(), inputTables.size());
validateInputTables(process, inputTables);
}
if (outputTables == null) {
Assert.assertNull(process.get(OUTPUTS));
} else {
Assert.assertEquals(((List<Id>) process.get(OUTPUTS)).size(), outputTables.size());
validateOutputTables(process, outputTables);
}
return process;
}
Aggregations