use of com.thinkbiganalytics.discovery.model.DefaultTag in project kylo by Teradata.
the class FeedIT method getCreateFeedRequest.
protected FeedMetadata getCreateFeedRequest(FeedCategory category, ImportTemplate template, String name) throws Exception {
FeedMetadata feed = new FeedMetadata();
feed.setFeedName(name);
feed.setSystemFeedName(name.toLowerCase());
feed.setCategory(category);
feed.setTemplateId(template.getTemplateId());
feed.setTemplateName(template.getTemplateName());
feed.setDescription("Created by functional test");
feed.setInputProcessorType("org.apache.nifi.processors.standard.GetFile");
List<NifiProperty> properties = new ArrayList<>();
NifiProperty fileFilter = new NifiProperty("305363d8-015a-1000-0000-000000000000", "1f67e296-2ff8-4b5d-0000-000000000000", "File Filter", USERDATA1_CSV);
fileFilter.setProcessGroupName("NiFi Flow");
fileFilter.setProcessorName("Filesystem");
fileFilter.setProcessorType("org.apache.nifi.processors.standard.GetFile");
fileFilter.setTemplateValue("mydata\\d{1,3}.csv");
fileFilter.setInputProperty(true);
fileFilter.setUserEditable(true);
properties.add(fileFilter);
NifiProperty inputDir = new NifiProperty("305363d8-015a-1000-0000-000000000000", "1f67e296-2ff8-4b5d-0000-000000000000", "Input Directory", VAR_DROPZONE);
inputDir.setProcessGroupName("NiFi Flow");
inputDir.setProcessorName("Filesystem");
inputDir.setProcessorType("org.apache.nifi.processors.standard.GetFile");
inputDir.setInputProperty(true);
inputDir.setUserEditable(true);
properties.add(inputDir);
NifiProperty loadStrategy = new NifiProperty("305363d8-015a-1000-0000-000000000000", "6aeabec7-ec36-4ed5-0000-000000000000", "Load Strategy", "FULL_LOAD");
loadStrategy.setProcessGroupName("NiFi Flow");
loadStrategy.setProcessorName("GetTableData");
loadStrategy.setProcessorType("com.thinkbiganalytics.nifi.v2.ingest.GetTableData");
properties.add(loadStrategy);
feed.setProperties(properties);
FeedSchedule schedule = new FeedSchedule();
schedule.setConcurrentTasks(1);
schedule.setSchedulingPeriod("15 sec");
schedule.setSchedulingStrategy("TIMER_DRIVEN");
feed.setSchedule(schedule);
TableSetup table = new TableSetup();
DefaultTableSchema schema = new DefaultTableSchema();
schema.setName("test1");
List<Field> fields = new ArrayList<>();
fields.add(newTimestampField("registration_dttm"));
fields.add(newBigIntField("id"));
fields.add(newStringField("first_name"));
fields.add(newStringField("second_name"));
fields.add(newStringField("email"));
fields.add(newStringField("gender"));
fields.add(newStringField("ip_address"));
fields.add(newBinaryField("cc"));
fields.add(newStringField("country"));
fields.add(newStringField("birthdate"));
fields.add(newStringField("salary"));
schema.setFields(fields);
table.setTableSchema(schema);
table.setSourceTableSchema(schema);
table.setFeedTableSchema(schema);
table.setTargetMergeStrategy("DEDUPE_AND_MERGE");
table.setFeedFormat("ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'\n WITH SERDEPROPERTIES ( 'separatorChar' = ',' ,'escapeChar' = '\\\\' ,'quoteChar' = '\\'') STORED AS TEXTFILE");
table.setTargetFormat("STORED AS ORC");
List<FieldPolicy> policies = new ArrayList<>();
policies.add(newPolicyBuilder("registration_dttm").toPolicy());
policies.add(newPolicyBuilder("id").toPolicy());
policies.add(newPolicyBuilder("first_name").withStandardisation(toUpperCase).withProfile().withIndex().toPolicy());
policies.add(newPolicyBuilder("second_name").withProfile().withIndex().toPolicy());
policies.add(newPolicyBuilder("email").withValidation(email).toPolicy());
policies.add(newPolicyBuilder("gender").withValidation(lookup, notNull).toPolicy());
policies.add(newPolicyBuilder("ip_address").withValidation(ipAddress).toPolicy());
policies.add(newPolicyBuilder("cc").withStandardisation(base64EncodeBinary).withProfile().toPolicy());
policies.add(newPolicyBuilder("country").withStandardisation(base64EncodeBinary, base64DecodeBinary, base64EncodeString, base64DecodeString).withValidation(notNull, length).withProfile().toPolicy());
policies.add(newPolicyBuilder("birthdate").toPolicy());
policies.add(newPolicyBuilder("salary").toPolicy());
table.setFieldPolicies(policies);
List<PartitionField> partitions = new ArrayList<>();
partitions.add(byYear("registration_dttm"));
table.setPartitions(partitions);
TableOptions options = new TableOptions();
options.setCompressionFormat("SNAPPY");
options.setAuditLogging(true);
table.setOptions(options);
table.setTableType("SNAPSHOT");
feed.setTable(table);
feed.setOptions(new FeedProcessingOptions());
feed.getOptions().setSkipHeader(true);
feed.setDataOwner("Marketing");
List<Tag> tags = new ArrayList<>();
tags.add(new DefaultTag("users"));
tags.add(new DefaultTag("registrations"));
feed.setTags(tags);
User owner = new User();
owner.setSystemName("dladmin");
owner.setDisplayName("Data Lake Admin");
Set<String> groups = new HashSet<>();
groups.add("admin");
groups.add("user");
owner.setGroups(groups);
feed.setOwner(owner);
return feed;
}
use of com.thinkbiganalytics.discovery.model.DefaultTag in project kylo by Teradata.
the class FeedIT method testEditFeed.
@Test
public void testEditFeed() throws Exception {
// Prepare environment
prepare();
final FeedCategory category = createCategory(CATEGORY_NAME);
final ImportTemplate template = importDataIngestTemplate();
// Create feed
FeedMetadata feed = getCreateFeedRequest(category, template, createNewFeedName());
feed.setDescription("Test feed");
feed.setDataOwner("Some Guy");
FeedMetadata response = createFeed(feed).getFeedMetadata();
Assert.assertEquals(feed.getFeedName(), response.getFeedName());
Assert.assertEquals(feed.getDataOwner(), response.getDataOwner());
// Edit feed
feed.setId(response.getId());
feed.setFeedId(response.getFeedId());
feed.setIsNew(false);
feed.setDescription(null);
feed.setDataOwner("Some Other Guy");
NifiProperty fileFilter = feed.getProperties().get(0);
fileFilter.setValue("some-file.csv");
List<FieldPolicy> policies = feed.getTable().getFieldPolicies();
FieldPolicy id = policies.get(1);
// add new validator
id.getValidation().add(notNull);
feed.getTable().setPrimaryKeyFields("id");
FieldPolicy firstName = policies.get(2);
// flip profiling
firstName.setProfile(false);
FieldPolicy secondName = policies.get(3);
// flip indexing
secondName.setIndex(false);
// add new standardiser
secondName.getStandardization().add(toUpperCase);
FieldPolicy email = policies.get(4);
// remove validators
email.setValidation(Collections.emptyList());
FieldPolicy gender = policies.get(5);
FieldValidationRule lookup = gender.getValidation().get(0);
// change existing validator property
lookup.getProperties().get(0).setValue("new value");
// add profiling
gender.setProfile(true);
// add indexing
gender.setIndex(true);
FieldPolicy creditCard = policies.get(7);
FieldStandardizationRule base64EncodeBinary = creditCard.getStandardization().get(0);
// change existing standardiser property
base64EncodeBinary.getProperties().get(0).setValue("STRING");
feed.getOptions().setSkipHeader(false);
feed.getTable().setTargetMergeStrategy("ROLLING_SYNC");
feed.getTags().add(new DefaultTag("updated"));
feed.getSchedule().setSchedulingPeriod("20 sec");
response = createFeed(feed).getFeedMetadata();
Assert.assertEquals(feed.getFeedName(), response.getFeedName());
Assert.assertEquals(feed.getDescription(), response.getDescription());
FeedVersions feedVersions = getVersions(feed.getFeedId());
List<EntityVersion> versions = feedVersions.getVersions();
Assert.assertEquals(2, versions.size());
EntityVersionDifference entityDiff = getVersionDiff(feed.getFeedId(), versions.get(1).getId(), versions.get(0).getId());
EntityDifference diff = entityDiff.getDifference();
JsonNode patch = diff.getPatch();
ArrayNode diffs = (ArrayNode) patch;
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/properties/0/value", "some-file.csv")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/schedule/schedulingPeriod", "20 sec")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("remove", "/description")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("add", "/tags/1")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/dataOwner", "Some Other Guy")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("add", "/table/fieldPolicies/1/validation/0")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/table/fieldPolicies/2/profile", "false")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/table/fieldPolicies/3/index", "false")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("add", "/table/fieldPolicies/3/standardization/0")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("remove", "/table/fieldPolicies/4/validation/0")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/table/fieldPolicies/5/profile", "true")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/table/fieldPolicies/5/index", "true")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/table/fieldPolicies/5/validation/0/properties/0/value", "new value")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/table/fieldPolicies/7/standardization/0/properties/0/value", "STRING")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/table/fieldPolicies/8/standardization/0/properties/0/value", "STRING")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/table/targetMergeStrategy", "ROLLING_SYNC")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/table/fieldIndexString", "first_name,gender")));
Assert.assertTrue(versionPatchContains(diffs, new Diff("replace", "/options/skipHeader", "false")));
}
use of com.thinkbiganalytics.discovery.model.DefaultTag in project kylo by Teradata.
the class IntegrationTestBase method makeCreateFeedRequest.
protected FeedMetadata makeCreateFeedRequest(FeedCategory category, ImportTemplate template, String feedName, String testFile) {
FeedMetadata feed = new FeedMetadata();
feed.setFeedName(feedName);
feed.setSystemFeedName(feedName.toLowerCase());
feed.setCategory(category);
feed.setTemplateId(template.getTemplateId());
feed.setTemplateName(template.getTemplateName());
feed.setDescription("Created by functional test");
feed.setInputProcessorType("org.apache.nifi.processors.standard.GetFile");
List<NifiProperty> properties = new ArrayList<>();
NifiProperty fileFilter = new NifiProperty("764d053d-015e-1000-b8a2-763cd17080e1", "cffa8f24-d097-3c7a-7d04-26b7feff81ab", "File Filter", testFile);
fileFilter.setProcessGroupName("NiFi Flow");
fileFilter.setProcessorName("GetFile");
fileFilter.setProcessorType("org.apache.nifi.processors.standard.GetFile");
fileFilter.setTemplateValue("mydata\\d{1,3}.csv");
fileFilter.setInputProperty(true);
fileFilter.setUserEditable(true);
properties.add(fileFilter);
feed.setProperties(properties);
FeedSchedule schedule = new FeedSchedule();
schedule.setConcurrentTasks(1);
schedule.setSchedulingPeriod("15 sec");
schedule.setSchedulingStrategy("TIMER_DRIVEN");
feed.setSchedule(schedule);
feed.setDataOwner("Marketing");
List<Tag> tags = new ArrayList<>();
tags.add(new DefaultTag("functional tests"));
tags.add(new DefaultTag("for category " + category.getName()));
feed.setTags(tags);
User owner = new User();
owner.setSystemName("dladmin");
owner.setDisplayName("Data Lake Admin");
Set<String> groups = new HashSet<>();
groups.add("admin");
groups.add("user");
owner.setGroups(groups);
feed.setOwner(owner);
return feed;
}
use of com.thinkbiganalytics.discovery.model.DefaultTag in project kylo by Teradata.
the class FeedITBase method editFeed.
protected void editFeed() throws Exception {
// Prepare environment
prepare();
final FeedCategory category = createCategory(CATEGORY_NAME);
final ImportTemplate template = importDataIngestTemplate();
// Create feed
FeedMetadata feed = getCreateFeedRequest(category, template, createNewFeedName());
feed.setDescription("Test feed");
feed.setDataOwner("Some Guy");
FeedMetadata response = createFeed(feed).getFeedMetadata();
Assert.assertEquals(feed.getFeedName(), response.getFeedName());
Assert.assertEquals(feed.getDataOwner(), response.getDataOwner());
// Edit feed
feed.setId(response.getId());
feed.setFeedId(response.getFeedId());
feed.setIsNew(false);
feed.setDescription(null);
feed.setDataOwner("Some Other Guy");
NifiProperty fileFilter = feed.getProperties().get(0);
fileFilter.setValue(getEditedFileName());
List<FieldPolicy> policies = feed.getTable().getFieldPolicies();
editFieldPolicies(policies);
feed.getTable().setPrimaryKeyFields("id");
feed.getOptions().setSkipHeader(false);
feed.getTable().setTargetMergeStrategy("ROLLING_SYNC");
feed.getTags().add(new DefaultTag("updated"));
feed.getSchedule().setSchedulingPeriod("20 sec");
response = createFeed(feed).getFeedMetadata();
Assert.assertEquals(feed.getFeedName(), response.getFeedName());
Assert.assertEquals(feed.getDescription(), response.getDescription());
FeedVersions feedVersions = getVersions(feed.getFeedId());
List<EntityVersion> versions = feedVersions.getVersions();
Assert.assertEquals(2, versions.size());
EntityVersionDifference entityDiff = getVersionDiff(feed.getFeedId(), versions.get(1).getId(), versions.get(0).getId());
EntityDifference diff = entityDiff.getDifference();
JsonNode patch = diff.getPatch();
ArrayNode diffs = (ArrayNode) patch;
assertEditChanges(diffs);
}
use of com.thinkbiganalytics.discovery.model.DefaultTag in project kylo by Teradata.
the class CsvFeedIT method getTags.
protected List<Tag> getTags() {
List<Tag> tags = new ArrayList<>();
tags.add(new DefaultTag("users"));
tags.add(new DefaultTag("registrations"));
return tags;
}
Aggregations