use of com.thinkbiganalytics.feedmgr.rest.model.schema.TableSetup in project kylo by Teradata.
the class HiveColumnsUpgradeAction method upgradeTo.
@Override
public void upgradeTo(final KyloVersion startingVersion) {
log.info("Upgrading hive columns from version: {}", startingVersion);
feedService.getFeeds().stream().filter(feed -> Optional.ofNullable(feed.getTable()).map(TableSetup::getTableSchema).map(TableSchema::getFields).isPresent()).forEach(feed -> {
final TableSchema schema = feed.getTable().getTableSchema();
final DerivedDatasource datasource = datasourceProvider.findDerivedDatasource("HiveDatasource", feed.getSystemCategoryName() + "." + feed.getSystemFeedName());
if (datasource != null) {
log.info("Upgrading schema: {}/{}", schema.getDatabaseName(), schema.getSchemaName());
datasource.setGenericProperties(Collections.singletonMap("columns", (Serializable) schema.getFields()));
}
});
}
use of com.thinkbiganalytics.feedmgr.rest.model.schema.TableSetup in project kylo by Teradata.
the class FeedHiveTableService method updateColumnDescriptions.
/**
* Updates the column descriptions in the Hive metastore for the specified feed.
*
* @param feed the feed to update
* @throws DataAccessException if there is any problem
*/
public void updateColumnDescriptions(@Nonnull final FeedMetadata feed) {
final List<Field> feedFields = Optional.ofNullable(feed.getTable()).map(TableSetup::getTableSchema).map(TableSchema::getFields).orElse(null);
if (feedFields != null && !feedFields.isEmpty()) {
final TableSchema hiveSchema = hiveService.getTableSchema(feed.getSystemCategoryName(), feed.getSystemFeedName());
if (hiveSchema != null) {
final Map<String, Field> hiveFieldMap = hiveSchema.getFields().stream().collect(Collectors.toMap(field -> field.getName().toLowerCase(), Function.identity()));
feedFields.stream().filter(feedField -> {
final Field hiveField = hiveFieldMap.get(feedField.getName().toLowerCase());
return hiveField != null && (StringUtils.isNotEmpty(feedField.getDescription()) || StringUtils.isNotEmpty(hiveField.getDescription())) && !Objects.equals(feedField.getDescription(), hiveField.getDescription());
}).forEach(feedField -> changeColumn(feed, feedField.getName(), feedField));
}
}
}
use of com.thinkbiganalytics.feedmgr.rest.model.schema.TableSetup in project kylo by Teradata.
the class PropertyExpressionResolverTest method testFeedMetadataProperties.
@Test
public void testFeedMetadataProperties() {
FeedMetadata metadata = new FeedMetadata();
metadata.setSystemFeedName("feedSystemName");
metadata.setCategory(new FeedCategory());
metadata.setTable(new TableSetup());
metadata.getTable().setSourceTableSchema(new DefaultTableSchema());
metadata.getTable().setTableSchema(new DefaultTableSchema());
metadata.getTable().getSourceTableSchema().setName("sourceTableName");
metadata.getTable().getTableSchema().setName("tableSchemaName");
final NifiProperty prop1 = createProperty("${metadata.table.sourceTableSchema.name}");
Assert.assertTrue(resolver.resolveExpression(metadata, prop1));
Assert.assertEquals("sourceTableName", prop1.getValue());
}
use of com.thinkbiganalytics.feedmgr.rest.model.schema.TableSetup in project kylo by Teradata.
the class DerivedDatasourceFactory method ensureDatasource.
public Datasource.ID ensureDatasource(TemplateProcessorDatasourceDefinition definition, FeedMetadata feedMetadata, List<NifiProperty> allProperties) {
return metadataAccess.commit(() -> {
List<NifiProperty> propertiesToEvalulate = new ArrayList<NifiProperty>();
// fetch the def
DatasourceDefinition datasourceDefinition = datasourceDefinitionProvider.findByProcessorType(definition.getProcessorType());
if (datasourceDefinition != null) {
// find out if there are any saved properties on the Feed that match the datasourceDef
List<NifiProperty> feedProperties = feedMetadata.getProperties().stream().filter(property -> matchesDefinition(definition, property) && datasourceDefinition.getDatasourcePropertyKeys().contains(property.getKey())).collect(Collectors.toList());
// resolve any ${metadata.} properties
List<NifiProperty> resolvedFeedProperties = propertyExpressionResolver.resolvePropertyExpressions(feedProperties, feedMetadata);
List<NifiProperty> resolvedAllProperties = propertyExpressionResolver.resolvePropertyExpressions(allProperties, feedMetadata);
// propetyHash
propertiesToEvalulate.addAll(feedProperties);
propertiesToEvalulate.addAll(allProperties);
propertyExpressionResolver.resolveStaticProperties(propertiesToEvalulate);
String identityString = datasourceDefinition.getIdentityString();
String desc = datasourceDefinition.getDescription();
String title = datasourceDefinition.getTitle();
PropertyExpressionResolver.ResolvedVariables identityStringPropertyResolution = propertyExpressionResolver.resolveVariables(identityString, propertiesToEvalulate);
identityString = identityStringPropertyResolution.getResolvedString();
PropertyExpressionResolver.ResolvedVariables titlePropertyResolution = propertyExpressionResolver.resolveVariables(title, propertiesToEvalulate);
title = titlePropertyResolution.getResolvedString();
if (desc != null) {
PropertyExpressionResolver.ResolvedVariables descriptionPropertyResolution = propertyExpressionResolver.resolveVariables(desc, propertiesToEvalulate);
desc = descriptionPropertyResolution.getResolvedString();
}
// if the identityString still contains unresolved variables then make the title readable and replace the idstring with the feed.id
if (propertyExpressionResolver.containsVariablesPatterns(identityString)) {
title = propertyExpressionResolver.replaceAll(title, " {runtime variable} ");
identityString = propertyExpressionResolver.replaceAll(identityString, feedMetadata.getId());
}
// if it is the Source ensure the feed matches this ds
if (isCreateDatasource(datasourceDefinition, feedMetadata)) {
Map<String, String> controllerServiceProperties = parseControllerServiceProperties(datasourceDefinition, feedProperties);
Map<String, Object> properties = new HashMap<String, Object>(identityStringPropertyResolution.getResolvedVariables());
properties.putAll(controllerServiceProperties);
DerivedDatasource derivedDatasource = datasourceProvider.ensureDerivedDatasource(datasourceDefinition.getDatasourceType(), identityString, title, desc, properties);
if (derivedDatasource != null) {
if ("HiveDatasource".equals(derivedDatasource.getDatasourceType()) && Optional.ofNullable(feedMetadata.getTable()).map(TableSetup::getTableSchema).map(TableSchema::getFields).isPresent()) {
derivedDatasource.setGenericProperties(Collections.singletonMap("columns", (Serializable) feedMetadata.getTable().getTableSchema().getFields()));
}
return derivedDatasource.getId();
}
}
return null;
} else {
return null;
}
}, MetadataAccess.SERVICE);
}
use of com.thinkbiganalytics.feedmgr.rest.model.schema.TableSetup in project kylo by Teradata.
the class FeedIT method getCreateFeedRequest.
protected FeedMetadata getCreateFeedRequest(FeedCategory category, ImportTemplate template, String name) throws Exception {
FeedMetadata feed = new FeedMetadata();
feed.setFeedName(name);
feed.setSystemFeedName(name.toLowerCase());
feed.setCategory(category);
feed.setTemplateId(template.getTemplateId());
feed.setTemplateName(template.getTemplateName());
feed.setDescription("Created by functional test");
feed.setInputProcessorType("org.apache.nifi.processors.standard.GetFile");
List<NifiProperty> properties = new ArrayList<>();
NifiProperty fileFilter = new NifiProperty("305363d8-015a-1000-0000-000000000000", "1f67e296-2ff8-4b5d-0000-000000000000", "File Filter", USERDATA1_CSV);
fileFilter.setProcessGroupName("NiFi Flow");
fileFilter.setProcessorName("Filesystem");
fileFilter.setProcessorType("org.apache.nifi.processors.standard.GetFile");
fileFilter.setTemplateValue("mydata\\d{1,3}.csv");
fileFilter.setInputProperty(true);
fileFilter.setUserEditable(true);
properties.add(fileFilter);
NifiProperty inputDir = new NifiProperty("305363d8-015a-1000-0000-000000000000", "1f67e296-2ff8-4b5d-0000-000000000000", "Input Directory", VAR_DROPZONE);
inputDir.setProcessGroupName("NiFi Flow");
inputDir.setProcessorName("Filesystem");
inputDir.setProcessorType("org.apache.nifi.processors.standard.GetFile");
inputDir.setInputProperty(true);
inputDir.setUserEditable(true);
properties.add(inputDir);
NifiProperty loadStrategy = new NifiProperty("305363d8-015a-1000-0000-000000000000", "6aeabec7-ec36-4ed5-0000-000000000000", "Load Strategy", "FULL_LOAD");
loadStrategy.setProcessGroupName("NiFi Flow");
loadStrategy.setProcessorName("GetTableData");
loadStrategy.setProcessorType("com.thinkbiganalytics.nifi.v2.ingest.GetTableData");
properties.add(loadStrategy);
feed.setProperties(properties);
FeedSchedule schedule = new FeedSchedule();
schedule.setConcurrentTasks(1);
schedule.setSchedulingPeriod("15 sec");
schedule.setSchedulingStrategy("TIMER_DRIVEN");
feed.setSchedule(schedule);
TableSetup table = new TableSetup();
DefaultTableSchema schema = new DefaultTableSchema();
schema.setName("test1");
List<Field> fields = new ArrayList<>();
fields.add(newTimestampField("registration_dttm"));
fields.add(newBigIntField("id"));
fields.add(newStringField("first_name"));
fields.add(newStringField("second_name"));
fields.add(newStringField("email"));
fields.add(newStringField("gender"));
fields.add(newStringField("ip_address"));
fields.add(newBinaryField("cc"));
fields.add(newStringField("country"));
fields.add(newStringField("birthdate"));
fields.add(newStringField("salary"));
schema.setFields(fields);
table.setTableSchema(schema);
table.setSourceTableSchema(schema);
table.setFeedTableSchema(schema);
table.setTargetMergeStrategy("DEDUPE_AND_MERGE");
table.setFeedFormat("ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'\n WITH SERDEPROPERTIES ( 'separatorChar' = ',' ,'escapeChar' = '\\\\' ,'quoteChar' = '\\'') STORED AS TEXTFILE");
table.setTargetFormat("STORED AS ORC");
List<FieldPolicy> policies = new ArrayList<>();
policies.add(newPolicyBuilder("registration_dttm").toPolicy());
policies.add(newPolicyBuilder("id").toPolicy());
policies.add(newPolicyBuilder("first_name").withStandardisation(toUpperCase).withProfile().withIndex().toPolicy());
policies.add(newPolicyBuilder("second_name").withProfile().withIndex().toPolicy());
policies.add(newPolicyBuilder("email").withValidation(email).toPolicy());
policies.add(newPolicyBuilder("gender").withValidation(lookup, notNull).toPolicy());
policies.add(newPolicyBuilder("ip_address").withValidation(ipAddress).toPolicy());
policies.add(newPolicyBuilder("cc").withStandardisation(base64EncodeBinary).withProfile().toPolicy());
policies.add(newPolicyBuilder("country").withStandardisation(base64EncodeBinary, base64DecodeBinary, base64EncodeString, base64DecodeString).withValidation(notNull, length).withProfile().toPolicy());
policies.add(newPolicyBuilder("birthdate").toPolicy());
policies.add(newPolicyBuilder("salary").toPolicy());
table.setFieldPolicies(policies);
List<PartitionField> partitions = new ArrayList<>();
partitions.add(byYear("registration_dttm"));
table.setPartitions(partitions);
TableOptions options = new TableOptions();
options.setCompressionFormat("SNAPPY");
options.setAuditLogging(true);
table.setOptions(options);
table.setTableType("SNAPSHOT");
feed.setTable(table);
feed.setOptions(new FeedProcessingOptions());
feed.getOptions().setSkipHeader(true);
feed.setDataOwner("Marketing");
List<Tag> tags = new ArrayList<>();
tags.add(new DefaultTag("users"));
tags.add(new DefaultTag("registrations"));
feed.setTags(tags);
User owner = new User();
owner.setSystemName("dladmin");
owner.setDisplayName("Data Lake Admin");
Set<String> groups = new HashSet<>();
groups.add("admin");
groups.add("user");
owner.setGroups(groups);
feed.setOwner(owner);
return feed;
}
Aggregations