use of com.thinkbiganalytics.util.ColumnSpec in project kylo by Teradata.
the class AbstractMergeTable method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLog();
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final String blockingValue = context.getProperty(BLOCKING_KEY).evaluateAttributeExpressions(flowFile).getValue();
String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
boolean block = false;
if (blocking && blockingCache.putIfAbsent(blockingValue, flowFileId) != null) {
if (StringUtils.isBlank(flowFile.getAttribute(BLOCKED_START_TIME))) {
flowFile = session.putAttribute(flowFile, BLOCKED_START_TIME, String.valueOf(System.currentTimeMillis()));
getLogger().info("Transferring Flow file {} to blocked relationship", new Object[] { flowFile });
}
// penalize the flow file and transfer to BLOCKED
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_BLOCKED);
return;
}
// Add Blocking time to flow file if this was a blocked flowfile.
if (blocking && StringUtils.isNotBlank(flowFile.getAttribute(BLOCKED_START_TIME))) {
String blockedStartTime = flowFile.getAttribute(BLOCKED_START_TIME);
try {
Long l = Long.parseLong(blockedStartTime);
Long blockTime = System.currentTimeMillis() - l;
getLogger().info("Processing Blocked flow file {}. This was blocked for {} ms", new Object[] { flowFile, blockTime });
flowFile = session.putAttribute(flowFile, BLOCKED_TIME, String.valueOf(blockTime) + " ms");
} catch (NumberFormatException e) {
}
}
String PROVENANCE_EXECUTION_STATUS_KEY = context.getName() + " Execution Status";
String partitionSpecString = context.getProperty(PARTITION_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue();
String sourceSchema = context.getProperty(SOURCE_SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
String sourceTable = context.getProperty(SOURCE_TABLE).evaluateAttributeExpressions(flowFile).getValue();
String targetSchema = context.getProperty(TARGET_SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
String targetTable = context.getProperty(TARGET_TABLE).evaluateAttributeExpressions(flowFile).getValue();
String feedPartitionValue = context.getProperty(FEED_PARTITION).evaluateAttributeExpressions(flowFile).getValue();
String mergeStrategyValue = context.getProperty(MERGE_STRATEGY).evaluateAttributeExpressions(flowFile).getValue();
String hiveConfigurations = context.getProperty(HIVE_CONFIGURATIONS).evaluateAttributeExpressions(flowFile).getValue();
boolean resetHive = context.getProperty(RESET_HIVE).asBoolean();
final ColumnSpec[] columnSpecs = Optional.ofNullable(context.getProperty(FIELD_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
if (STRATEGY_PK_MERGE.equals(mergeStrategyValue) && (columnSpecs == null || columnSpecs.length == 0)) {
getLog().error("Missing required field specification for PK merge feature");
flowFile = session.putAttribute(flowFile, PROVENANCE_EXECUTION_STATUS_KEY, "Failed: Missing required field specification for PK merge feature");
release(blockingValue);
session.transfer(flowFile, IngestProperties.REL_FAILURE);
return;
}
// Maintain default for backward compatibility
if (StringUtils.isEmpty(mergeStrategyValue)) {
mergeStrategyValue = STRATEGY_DEDUPE_MERGE;
}
logger.info("Merge strategy: " + mergeStrategyValue + " Using Source: " + sourceTable + " Target: " + targetTable + " feed partition:" + feedPartitionValue + " partSpec: " + partitionSpecString);
final StopWatch stopWatch = new StopWatch(true);
try (final Connection conn = getConnection(context)) {
TableMergeSyncSupport mergeSupport = new TableMergeSyncSupport(conn);
if (resetHive) {
mergeSupport.resetHiveConf();
}
mergeSupport.enableDynamicPartitions();
if (StringUtils.isNotEmpty(hiveConfigurations)) {
mergeSupport.setHiveConf(hiveConfigurations.split("\\|"));
}
PartitionSpec partitionSpec = new PartitionSpec(partitionSpecString);
if (STRATEGY_DEDUPE_MERGE.equals(mergeStrategyValue)) {
mergeSupport.doMerge(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue, true);
} else if (STRATEGY_MERGE.equals(mergeStrategyValue)) {
mergeSupport.doMerge(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue, false);
} else if (STRATEGY_SYNC.equals(mergeStrategyValue)) {
mergeSupport.doSync(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue);
} else if (STRATEGY_ROLLING_SYNC.equals(mergeStrategyValue)) {
mergeSupport.doRollingSync(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue);
} else if (STRATEGY_PK_MERGE.equals(mergeStrategyValue)) {
mergeSupport.doPKMerge(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue, columnSpecs);
} else {
throw new UnsupportedOperationException("Failed to resolve the merge strategy");
}
session.getProvenanceReporter().modifyContent(flowFile, "Execution completed", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
flowFile = session.putAttribute(flowFile, PROVENANCE_EXECUTION_STATUS_KEY, "Successful");
release(blockingValue);
logger.info("Execution completed: " + stopWatch.getElapsed(TimeUnit.MILLISECONDS) + " Merge strategy: " + mergeStrategyValue + " Using Source: " + sourceTable + " Target: " + targetTable + " feed partition:" + feedPartitionValue + " partSpec: " + partitionSpecString);
session.transfer(flowFile, REL_SUCCESS);
} catch (final Exception e) {
logger.error("Unable to execute merge doMerge for {} due to {}; routing to failure", new Object[] { flowFile, e }, e);
flowFile = session.putAttribute(flowFile, PROVENANCE_EXECUTION_STATUS_KEY, "Failed: " + e.getMessage());
release(blockingValue);
session.transfer(flowFile, REL_FAILURE);
}
}
use of com.thinkbiganalytics.util.ColumnSpec in project kylo by Teradata.
the class CreateElasticsearchBackedHiveTable method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
String jarUrl = context.getProperty(JAR_URL).evaluateAttributeExpressions(flowFile).getValue();
String useWan = context.getProperty(USE_WAN).getValue();
String autoIndex = context.getProperty(AUTO_CREATE_INDEX).getValue();
String idField = context.getProperty(ID_FIELD).evaluateAttributeExpressions(flowFile).getValue();
final ColumnSpec[] columnSpecs = Optional.ofNullable(context.getProperty(FIELD_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
validateArrayProperty(FIELD_SPECIFICATION.getDisplayName(), columnSpecs, session, flowFile);
final String feedName = context.getProperty(IngestProperties.FEED_NAME).evaluateAttributeExpressions(flowFile).getValue();
validateStringProperty(FEED_NAME.getDisplayName(), feedName, session, flowFile);
final String categoryName = context.getProperty(IngestProperties.FEED_CATEGORY).evaluateAttributeExpressions(flowFile).getValue();
validateStringProperty(FEED_CATEGORY.getDisplayName(), categoryName, session, flowFile);
final String nodes = context.getProperty(NODES).evaluateAttributeExpressions(flowFile).getValue();
validateStringProperty(NODES.getDisplayName(), nodes, session, flowFile);
final String indexString = context.getProperty(FIELD_INDEX_STRING).evaluateAttributeExpressions(flowFile).getValue();
validateStringProperty(FIELD_INDEX_STRING.getDisplayName(), indexString, session, flowFile);
final String feedRoot = context.getProperty(FEED_ROOT).evaluateAttributeExpressions(flowFile).getValue();
validateStringProperty(FEED_ROOT.getDisplayName(), indexString, session, flowFile);
List<String> hiveStatements = getHQLStatements(columnSpecs, nodes, feedRoot, feedName, categoryName, useWan, autoIndex, idField, jarUrl, indexString);
final ThriftService thriftService = context.getProperty(THRIFT_SERVICE).asControllerService(ThriftService.class);
executeStatements(context, session, flowFile, hiveStatements.toArray(new String[hiveStatements.size()]), thriftService);
}
use of com.thinkbiganalytics.util.ColumnSpec in project kylo by Teradata.
the class CreateElasticsearchBackedHiveTable method getHQLStatements.
public List<String> getHQLStatements(ColumnSpec[] columnSpecs, String nodes, String locationRoot, String feedName, String categoryName, String useWan, String autoIndex, String idField, String jarUrl, String indexFieldString) {
final ColumnSpec[] partitions = {};
TableType tableType = TableType.MASTER;
List<String> indexFields = Arrays.asList(indexFieldString.toLowerCase().split(","));
List<ColumnSpec> indexCols = Arrays.asList(columnSpecs).stream().filter(p -> indexFields.contains(p.getName().toLowerCase())).collect(Collectors.toList());
String columnsSQL = tableType.deriveColumnSpecification(indexCols.toArray(new ColumnSpec[indexCols.size()]), partitions, "");
String hql = generateHQL(columnsSQL, nodes, locationRoot, feedName, categoryName, useWan, autoIndex, idField);
List<String> hiveStatements = new ArrayList<>();
if (jarUrl != null && !jarUrl.isEmpty()) {
String addJar = "ADD JAR " + jarUrl;
hiveStatements.add(addJar);
}
hiveStatements.add(hql);
return hiveStatements;
}
use of com.thinkbiganalytics.util.ColumnSpec in project kylo by Teradata.
the class RegisterFeedTables method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
// Verify flow file exists
final FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
// Verify properties and attributes
final String feedFormatOptions = Optional.ofNullable(context.getProperty(FEED_FORMAT_SPECS).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).orElse(DEFAULT_FEED_FORMAT_OPTIONS);
final String targetFormatOptions = Optional.ofNullable(context.getProperty(TARGET_FORMAT_SPECS).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).orElse(DEFAULT_STORAGE_FORMAT);
final String feedTableProperties = context.getProperty(FEED_TBLPROPERTIES).evaluateAttributeExpressions(flowFile).getValue();
final String targetTableProperties = context.getProperty(TARGET_TBLPROPERTIES).evaluateAttributeExpressions(flowFile).getValue();
final ColumnSpec[] partitions = Optional.ofNullable(context.getProperty(PARTITION_SPECS).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
final String tableType = context.getProperty(TABLE_TYPE).getValue();
final String feedTableOverride = context.getProperty(FEED_TABLE_OVERRIDE).evaluateAttributeExpressions(flowFile).getValue();
final ColumnSpec[] columnSpecs = Optional.ofNullable(context.getProperty(FIELD_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
if (StringUtils.isEmpty(feedTableOverride) && (columnSpecs == null || columnSpecs.length == 0)) {
getLog().error("Missing field specification");
session.transfer(flowFile, IngestProperties.REL_FAILURE);
return;
}
ColumnSpec[] feedColumnSpecs = Optional.ofNullable(context.getProperty(FEED_FIELD_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
if (feedColumnSpecs == null || feedColumnSpecs.length == 0) {
// Backwards compatibility with older templates we set the source and target to the same
feedColumnSpecs = columnSpecs;
}
final String entity = context.getProperty(IngestProperties.FEED_NAME).evaluateAttributeExpressions(flowFile).getValue();
if (entity == null || entity.isEmpty()) {
getLog().error("Missing feed name");
session.transfer(flowFile, IngestProperties.REL_FAILURE);
return;
}
final String source = context.getProperty(IngestProperties.FEED_CATEGORY).evaluateAttributeExpressions(flowFile).getValue();
if (source == null || source.isEmpty()) {
getLog().error("Missing category name");
session.transfer(flowFile, IngestProperties.REL_FAILURE);
return;
}
final String feedRoot = context.getProperty(FEED_ROOT).evaluateAttributeExpressions(flowFile).getValue();
final String profileRoot = context.getProperty(PROFILE_ROOT).evaluateAttributeExpressions(flowFile).getValue();
final String masterRoot = context.getProperty(MASTER_ROOT).evaluateAttributeExpressions(flowFile).getValue();
final TableRegisterConfiguration config = new TableRegisterConfiguration(feedRoot, profileRoot, masterRoot);
// Register the tables
final ThriftService thriftService = context.getProperty(THRIFT_SERVICE).asControllerService(ThriftService.class);
try (final Connection conn = thriftService.getConnection()) {
final TableRegisterSupport register = new TableRegisterSupport(conn, config);
final boolean result;
if (ALL_TABLES.equals(tableType)) {
result = register.registerStandardTables(source, entity, feedColumnSpecs, feedFormatOptions, targetFormatOptions, partitions, columnSpecs, feedTableProperties, targetTableProperties, feedTableOverride);
} else {
result = register.registerTable(source, entity, feedColumnSpecs, feedFormatOptions, targetFormatOptions, partitions, columnSpecs, feedTableProperties, targetTableProperties, TableType.valueOf(tableType), true, feedTableOverride);
}
final Relationship relnResult = (result ? REL_SUCCESS : REL_FAILURE);
session.transfer(flowFile, relnResult);
} catch (final ProcessException | SQLException e) {
getLog().error("Unable to obtain connection for {} due to {}; routing to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
}
}
use of com.thinkbiganalytics.util.ColumnSpec in project kylo by Teradata.
the class TableRegisterSupport method adjustInvalidColumnSpec.
/**
* copy the columnSpecs and reset the datatypes to match that of the feed column specs
*
* @param feedColumnSpecs
* @param columnSpecs
* @return
*/
protected ColumnSpec[] adjustInvalidColumnSpec(ColumnSpec[] feedColumnSpecs, ColumnSpec[] columnSpecs) {
// find the source data types from the _feed table that match these columns and replace the data types
Map<String, ColumnSpec> feedColumnSpecMap = Arrays.asList(feedColumnSpecs).stream().collect(Collectors.toMap(ColumnSpec::getName, Function.identity()));
List<ColumnSpec> invalidColumnSpecs = Arrays.asList(columnSpecs).stream().map(c -> {
ColumnSpec copy = new ColumnSpec(c);
if (StringUtils.isNotBlank(copy.getOtherColumnName()) && feedColumnSpecMap.containsKey(copy.getOtherColumnName())) {
ColumnSpec feedSpec = feedColumnSpecMap.get(copy.getOtherColumnName());
copy.setDataType(feedSpec.getDataType());
}
return copy;
}).collect(Collectors.toList());
return invalidColumnSpecs.toArray(new ColumnSpec[invalidColumnSpecs.size()]);
}
Aggregations