Search in sources :

Example 1 with TableMergeSyncSupport

use of com.thinkbiganalytics.ingest.TableMergeSyncSupport in project kylo by Teradata.

the class AbstractMergeTable method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final String blockingValue = context.getProperty(BLOCKING_KEY).evaluateAttributeExpressions(flowFile).getValue();
    String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
    boolean block = false;
    if (blocking && blockingCache.putIfAbsent(blockingValue, flowFileId) != null) {
        if (StringUtils.isBlank(flowFile.getAttribute(BLOCKED_START_TIME))) {
            flowFile = session.putAttribute(flowFile, BLOCKED_START_TIME, String.valueOf(System.currentTimeMillis()));
            getLogger().info("Transferring Flow file {} to blocked relationship", new Object[] { flowFile });
        }
        // penalize the flow file and transfer to BLOCKED
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_BLOCKED);
        return;
    }
    // Add Blocking time to flow file if this was a blocked flowfile.
    if (blocking && StringUtils.isNotBlank(flowFile.getAttribute(BLOCKED_START_TIME))) {
        String blockedStartTime = flowFile.getAttribute(BLOCKED_START_TIME);
        try {
            Long l = Long.parseLong(blockedStartTime);
            Long blockTime = System.currentTimeMillis() - l;
            getLogger().info("Processing Blocked flow file {}.  This was blocked for {} ms", new Object[] { flowFile, blockTime });
            flowFile = session.putAttribute(flowFile, BLOCKED_TIME, String.valueOf(blockTime) + " ms");
        } catch (NumberFormatException e) {
        }
    }
    String PROVENANCE_EXECUTION_STATUS_KEY = context.getName() + " Execution Status";
    String partitionSpecString = context.getProperty(PARTITION_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue();
    String sourceSchema = context.getProperty(SOURCE_SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
    String sourceTable = context.getProperty(SOURCE_TABLE).evaluateAttributeExpressions(flowFile).getValue();
    String targetSchema = context.getProperty(TARGET_SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
    String targetTable = context.getProperty(TARGET_TABLE).evaluateAttributeExpressions(flowFile).getValue();
    String feedPartitionValue = context.getProperty(FEED_PARTITION).evaluateAttributeExpressions(flowFile).getValue();
    String mergeStrategyValue = context.getProperty(MERGE_STRATEGY).evaluateAttributeExpressions(flowFile).getValue();
    String hiveConfigurations = context.getProperty(HIVE_CONFIGURATIONS).evaluateAttributeExpressions(flowFile).getValue();
    boolean resetHive = context.getProperty(RESET_HIVE).asBoolean();
    final ColumnSpec[] columnSpecs = Optional.ofNullable(context.getProperty(FIELD_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
    if (STRATEGY_PK_MERGE.equals(mergeStrategyValue) && (columnSpecs == null || columnSpecs.length == 0)) {
        getLog().error("Missing required field specification for PK merge feature");
        flowFile = session.putAttribute(flowFile, PROVENANCE_EXECUTION_STATUS_KEY, "Failed: Missing required field specification for PK merge feature");
        release(blockingValue);
        session.transfer(flowFile, IngestProperties.REL_FAILURE);
        return;
    }
    // Maintain default for backward compatibility
    if (StringUtils.isEmpty(mergeStrategyValue)) {
        mergeStrategyValue = STRATEGY_DEDUPE_MERGE;
    }
    logger.info("Merge strategy: " + mergeStrategyValue + " Using Source: " + sourceTable + " Target: " + targetTable + " feed partition:" + feedPartitionValue + " partSpec: " + partitionSpecString);
    final StopWatch stopWatch = new StopWatch(true);
    try (final Connection conn = getConnection(context)) {
        TableMergeSyncSupport mergeSupport = new TableMergeSyncSupport(conn);
        if (resetHive) {
            mergeSupport.resetHiveConf();
        }
        mergeSupport.enableDynamicPartitions();
        if (StringUtils.isNotEmpty(hiveConfigurations)) {
            mergeSupport.setHiveConf(hiveConfigurations.split("\\|"));
        }
        PartitionSpec partitionSpec = new PartitionSpec(partitionSpecString);
        if (STRATEGY_DEDUPE_MERGE.equals(mergeStrategyValue)) {
            mergeSupport.doMerge(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue, true);
        } else if (STRATEGY_MERGE.equals(mergeStrategyValue)) {
            mergeSupport.doMerge(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue, false);
        } else if (STRATEGY_SYNC.equals(mergeStrategyValue)) {
            mergeSupport.doSync(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue);
        } else if (STRATEGY_ROLLING_SYNC.equals(mergeStrategyValue)) {
            mergeSupport.doRollingSync(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue);
        } else if (STRATEGY_PK_MERGE.equals(mergeStrategyValue)) {
            mergeSupport.doPKMerge(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue, columnSpecs);
        } else {
            throw new UnsupportedOperationException("Failed to resolve the merge strategy");
        }
        session.getProvenanceReporter().modifyContent(flowFile, "Execution completed", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        flowFile = session.putAttribute(flowFile, PROVENANCE_EXECUTION_STATUS_KEY, "Successful");
        release(blockingValue);
        logger.info("Execution completed: " + stopWatch.getElapsed(TimeUnit.MILLISECONDS) + " Merge strategy: " + mergeStrategyValue + " Using Source: " + sourceTable + " Target: " + targetTable + " feed partition:" + feedPartitionValue + " partSpec: " + partitionSpecString);
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final Exception e) {
        logger.error("Unable to execute merge doMerge for {} due to {}; routing to failure", new Object[] { flowFile, e }, e);
        flowFile = session.putAttribute(flowFile, PROVENANCE_EXECUTION_STATUS_KEY, "Failed: " + e.getMessage());
        release(blockingValue);
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) Connection(java.sql.Connection) ComponentLog(org.apache.nifi.logging.ComponentLog) PartitionSpec(com.thinkbiganalytics.util.PartitionSpec) ProcessException(org.apache.nifi.processor.exception.ProcessException) StopWatch(org.apache.nifi.util.StopWatch) StringUtils(org.apache.commons.lang3.StringUtils) TableMergeSyncSupport(com.thinkbiganalytics.ingest.TableMergeSyncSupport)

Aggregations

TableMergeSyncSupport (com.thinkbiganalytics.ingest.TableMergeSyncSupport)1 ColumnSpec (com.thinkbiganalytics.util.ColumnSpec)1 PartitionSpec (com.thinkbiganalytics.util.PartitionSpec)1 Connection (java.sql.Connection)1 StringUtils (org.apache.commons.lang3.StringUtils)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ComponentLog (org.apache.nifi.logging.ComponentLog)1 ProcessException (org.apache.nifi.processor.exception.ProcessException)1 StopWatch (org.apache.nifi.util.StopWatch)1