use of com.thinkbiganalytics.discovery.model.DefaultTableSchema in project kylo by Teradata.
the class FeedIT method getCreateFeedRequest.
protected FeedMetadata getCreateFeedRequest(FeedCategory category, ImportTemplate template, String name) throws Exception {
FeedMetadata feed = new FeedMetadata();
feed.setFeedName(name);
feed.setSystemFeedName(name.toLowerCase());
feed.setCategory(category);
feed.setTemplateId(template.getTemplateId());
feed.setTemplateName(template.getTemplateName());
feed.setDescription("Created by functional test");
feed.setInputProcessorType("org.apache.nifi.processors.standard.GetFile");
List<NifiProperty> properties = new ArrayList<>();
NifiProperty fileFilter = new NifiProperty("305363d8-015a-1000-0000-000000000000", "1f67e296-2ff8-4b5d-0000-000000000000", "File Filter", USERDATA1_CSV);
fileFilter.setProcessGroupName("NiFi Flow");
fileFilter.setProcessorName("Filesystem");
fileFilter.setProcessorType("org.apache.nifi.processors.standard.GetFile");
fileFilter.setTemplateValue("mydata\\d{1,3}.csv");
fileFilter.setInputProperty(true);
fileFilter.setUserEditable(true);
properties.add(fileFilter);
NifiProperty inputDir = new NifiProperty("305363d8-015a-1000-0000-000000000000", "1f67e296-2ff8-4b5d-0000-000000000000", "Input Directory", VAR_DROPZONE);
inputDir.setProcessGroupName("NiFi Flow");
inputDir.setProcessorName("Filesystem");
inputDir.setProcessorType("org.apache.nifi.processors.standard.GetFile");
inputDir.setInputProperty(true);
inputDir.setUserEditable(true);
properties.add(inputDir);
NifiProperty loadStrategy = new NifiProperty("305363d8-015a-1000-0000-000000000000", "6aeabec7-ec36-4ed5-0000-000000000000", "Load Strategy", "FULL_LOAD");
loadStrategy.setProcessGroupName("NiFi Flow");
loadStrategy.setProcessorName("GetTableData");
loadStrategy.setProcessorType("com.thinkbiganalytics.nifi.v2.ingest.GetTableData");
properties.add(loadStrategy);
feed.setProperties(properties);
FeedSchedule schedule = new FeedSchedule();
schedule.setConcurrentTasks(1);
schedule.setSchedulingPeriod("15 sec");
schedule.setSchedulingStrategy("TIMER_DRIVEN");
feed.setSchedule(schedule);
TableSetup table = new TableSetup();
DefaultTableSchema schema = new DefaultTableSchema();
schema.setName("test1");
List<Field> fields = new ArrayList<>();
fields.add(newTimestampField("registration_dttm"));
fields.add(newBigIntField("id"));
fields.add(newStringField("first_name"));
fields.add(newStringField("second_name"));
fields.add(newStringField("email"));
fields.add(newStringField("gender"));
fields.add(newStringField("ip_address"));
fields.add(newBinaryField("cc"));
fields.add(newStringField("country"));
fields.add(newStringField("birthdate"));
fields.add(newStringField("salary"));
schema.setFields(fields);
table.setTableSchema(schema);
table.setSourceTableSchema(schema);
table.setFeedTableSchema(schema);
table.setTargetMergeStrategy("DEDUPE_AND_MERGE");
table.setFeedFormat("ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'\n WITH SERDEPROPERTIES ( 'separatorChar' = ',' ,'escapeChar' = '\\\\' ,'quoteChar' = '\\'') STORED AS TEXTFILE");
table.setTargetFormat("STORED AS ORC");
List<FieldPolicy> policies = new ArrayList<>();
policies.add(newPolicyBuilder("registration_dttm").toPolicy());
policies.add(newPolicyBuilder("id").toPolicy());
policies.add(newPolicyBuilder("first_name").withStandardisation(toUpperCase).withProfile().withIndex().toPolicy());
policies.add(newPolicyBuilder("second_name").withProfile().withIndex().toPolicy());
policies.add(newPolicyBuilder("email").withValidation(email).toPolicy());
policies.add(newPolicyBuilder("gender").withValidation(lookup, notNull).toPolicy());
policies.add(newPolicyBuilder("ip_address").withValidation(ipAddress).toPolicy());
policies.add(newPolicyBuilder("cc").withStandardisation(base64EncodeBinary).withProfile().toPolicy());
policies.add(newPolicyBuilder("country").withStandardisation(base64EncodeBinary, base64DecodeBinary, base64EncodeString, base64DecodeString).withValidation(notNull, length).withProfile().toPolicy());
policies.add(newPolicyBuilder("birthdate").toPolicy());
policies.add(newPolicyBuilder("salary").toPolicy());
table.setFieldPolicies(policies);
List<PartitionField> partitions = new ArrayList<>();
partitions.add(byYear("registration_dttm"));
table.setPartitions(partitions);
TableOptions options = new TableOptions();
options.setCompressionFormat("SNAPPY");
options.setAuditLogging(true);
table.setOptions(options);
table.setTableType("SNAPSHOT");
feed.setTable(table);
feed.setOptions(new FeedProcessingOptions());
feed.getOptions().setSkipHeader(true);
feed.setDataOwner("Marketing");
List<Tag> tags = new ArrayList<>();
tags.add(new DefaultTag("users"));
tags.add(new DefaultTag("registrations"));
feed.setTags(tags);
User owner = new User();
owner.setSystemName("dladmin");
owner.setDisplayName("Data Lake Admin");
Set<String> groups = new HashSet<>();
groups.add("admin");
groups.add("user");
owner.setGroups(groups);
feed.setOwner(owner);
return feed;
}
Aggregations