Search in sources :

Example 1 with ColumnSpec

use of com.thinkbiganalytics.util.ColumnSpec in project kylo by Teradata.

the class AbstractMergeTable method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final String blockingValue = context.getProperty(BLOCKING_KEY).evaluateAttributeExpressions(flowFile).getValue();
    String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
    boolean block = false;
    if (blocking && blockingCache.putIfAbsent(blockingValue, flowFileId) != null) {
        if (StringUtils.isBlank(flowFile.getAttribute(BLOCKED_START_TIME))) {
            flowFile = session.putAttribute(flowFile, BLOCKED_START_TIME, String.valueOf(System.currentTimeMillis()));
            getLogger().info("Transferring Flow file {} to blocked relationship", new Object[] { flowFile });
        }
        // penalize the flow file and transfer to BLOCKED
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_BLOCKED);
        return;
    }
    // Add Blocking time to flow file if this was a blocked flowfile.
    if (blocking && StringUtils.isNotBlank(flowFile.getAttribute(BLOCKED_START_TIME))) {
        String blockedStartTime = flowFile.getAttribute(BLOCKED_START_TIME);
        try {
            Long l = Long.parseLong(blockedStartTime);
            Long blockTime = System.currentTimeMillis() - l;
            getLogger().info("Processing Blocked flow file {}.  This was blocked for {} ms", new Object[] { flowFile, blockTime });
            flowFile = session.putAttribute(flowFile, BLOCKED_TIME, String.valueOf(blockTime) + " ms");
        } catch (NumberFormatException e) {
        }
    }
    String PROVENANCE_EXECUTION_STATUS_KEY = context.getName() + " Execution Status";
    String partitionSpecString = context.getProperty(PARTITION_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue();
    String sourceSchema = context.getProperty(SOURCE_SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
    String sourceTable = context.getProperty(SOURCE_TABLE).evaluateAttributeExpressions(flowFile).getValue();
    String targetSchema = context.getProperty(TARGET_SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
    String targetTable = context.getProperty(TARGET_TABLE).evaluateAttributeExpressions(flowFile).getValue();
    String feedPartitionValue = context.getProperty(FEED_PARTITION).evaluateAttributeExpressions(flowFile).getValue();
    String mergeStrategyValue = context.getProperty(MERGE_STRATEGY).evaluateAttributeExpressions(flowFile).getValue();
    String hiveConfigurations = context.getProperty(HIVE_CONFIGURATIONS).evaluateAttributeExpressions(flowFile).getValue();
    boolean resetHive = context.getProperty(RESET_HIVE).asBoolean();
    final ColumnSpec[] columnSpecs = Optional.ofNullable(context.getProperty(FIELD_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
    if (STRATEGY_PK_MERGE.equals(mergeStrategyValue) && (columnSpecs == null || columnSpecs.length == 0)) {
        getLog().error("Missing required field specification for PK merge feature");
        flowFile = session.putAttribute(flowFile, PROVENANCE_EXECUTION_STATUS_KEY, "Failed: Missing required field specification for PK merge feature");
        release(blockingValue);
        session.transfer(flowFile, IngestProperties.REL_FAILURE);
        return;
    }
    // Maintain default for backward compatibility
    if (StringUtils.isEmpty(mergeStrategyValue)) {
        mergeStrategyValue = STRATEGY_DEDUPE_MERGE;
    }
    logger.info("Merge strategy: " + mergeStrategyValue + " Using Source: " + sourceTable + " Target: " + targetTable + " feed partition:" + feedPartitionValue + " partSpec: " + partitionSpecString);
    final StopWatch stopWatch = new StopWatch(true);
    try (final Connection conn = getConnection(context)) {
        TableMergeSyncSupport mergeSupport = new TableMergeSyncSupport(conn);
        if (resetHive) {
            mergeSupport.resetHiveConf();
        }
        mergeSupport.enableDynamicPartitions();
        if (StringUtils.isNotEmpty(hiveConfigurations)) {
            mergeSupport.setHiveConf(hiveConfigurations.split("\\|"));
        }
        PartitionSpec partitionSpec = new PartitionSpec(partitionSpecString);
        if (STRATEGY_DEDUPE_MERGE.equals(mergeStrategyValue)) {
            mergeSupport.doMerge(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue, true);
        } else if (STRATEGY_MERGE.equals(mergeStrategyValue)) {
            mergeSupport.doMerge(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue, false);
        } else if (STRATEGY_SYNC.equals(mergeStrategyValue)) {
            mergeSupport.doSync(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue);
        } else if (STRATEGY_ROLLING_SYNC.equals(mergeStrategyValue)) {
            mergeSupport.doRollingSync(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue);
        } else if (STRATEGY_PK_MERGE.equals(mergeStrategyValue)) {
            mergeSupport.doPKMerge(sourceSchema, sourceTable, targetSchema, targetTable, partitionSpec, feedPartitionValue, columnSpecs);
        } else {
            throw new UnsupportedOperationException("Failed to resolve the merge strategy");
        }
        session.getProvenanceReporter().modifyContent(flowFile, "Execution completed", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        flowFile = session.putAttribute(flowFile, PROVENANCE_EXECUTION_STATUS_KEY, "Successful");
        release(blockingValue);
        logger.info("Execution completed: " + stopWatch.getElapsed(TimeUnit.MILLISECONDS) + " Merge strategy: " + mergeStrategyValue + " Using Source: " + sourceTable + " Target: " + targetTable + " feed partition:" + feedPartitionValue + " partSpec: " + partitionSpecString);
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final Exception e) {
        logger.error("Unable to execute merge doMerge for {} due to {}; routing to failure", new Object[] { flowFile, e }, e);
        flowFile = session.putAttribute(flowFile, PROVENANCE_EXECUTION_STATUS_KEY, "Failed: " + e.getMessage());
        release(blockingValue);
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) Connection(java.sql.Connection) ComponentLog(org.apache.nifi.logging.ComponentLog) PartitionSpec(com.thinkbiganalytics.util.PartitionSpec) ProcessException(org.apache.nifi.processor.exception.ProcessException) StopWatch(org.apache.nifi.util.StopWatch) StringUtils(org.apache.commons.lang3.StringUtils) TableMergeSyncSupport(com.thinkbiganalytics.ingest.TableMergeSyncSupport)

Example 2 with ColumnSpec

use of com.thinkbiganalytics.util.ColumnSpec in project kylo by Teradata.

the class CreateElasticsearchBackedHiveTable method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    String jarUrl = context.getProperty(JAR_URL).evaluateAttributeExpressions(flowFile).getValue();
    String useWan = context.getProperty(USE_WAN).getValue();
    String autoIndex = context.getProperty(AUTO_CREATE_INDEX).getValue();
    String idField = context.getProperty(ID_FIELD).evaluateAttributeExpressions(flowFile).getValue();
    final ColumnSpec[] columnSpecs = Optional.ofNullable(context.getProperty(FIELD_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
    validateArrayProperty(FIELD_SPECIFICATION.getDisplayName(), columnSpecs, session, flowFile);
    final String feedName = context.getProperty(IngestProperties.FEED_NAME).evaluateAttributeExpressions(flowFile).getValue();
    validateStringProperty(FEED_NAME.getDisplayName(), feedName, session, flowFile);
    final String categoryName = context.getProperty(IngestProperties.FEED_CATEGORY).evaluateAttributeExpressions(flowFile).getValue();
    validateStringProperty(FEED_CATEGORY.getDisplayName(), categoryName, session, flowFile);
    final String nodes = context.getProperty(NODES).evaluateAttributeExpressions(flowFile).getValue();
    validateStringProperty(NODES.getDisplayName(), nodes, session, flowFile);
    final String indexString = context.getProperty(FIELD_INDEX_STRING).evaluateAttributeExpressions(flowFile).getValue();
    validateStringProperty(FIELD_INDEX_STRING.getDisplayName(), indexString, session, flowFile);
    final String feedRoot = context.getProperty(FEED_ROOT).evaluateAttributeExpressions(flowFile).getValue();
    validateStringProperty(FEED_ROOT.getDisplayName(), indexString, session, flowFile);
    List<String> hiveStatements = getHQLStatements(columnSpecs, nodes, feedRoot, feedName, categoryName, useWan, autoIndex, idField, jarUrl, indexString);
    final ThriftService thriftService = context.getProperty(THRIFT_SERVICE).asControllerService(ThriftService.class);
    executeStatements(context, session, flowFile, hiveStatements.toArray(new String[hiveStatements.size()]), thriftService);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) ThriftService(com.thinkbiganalytics.nifi.v2.thrift.ThriftService) StringUtils(org.apache.commons.lang3.StringUtils)

Example 3 with ColumnSpec

use of com.thinkbiganalytics.util.ColumnSpec in project kylo by Teradata.

the class CreateElasticsearchBackedHiveTable method getHQLStatements.

public List<String> getHQLStatements(ColumnSpec[] columnSpecs, String nodes, String locationRoot, String feedName, String categoryName, String useWan, String autoIndex, String idField, String jarUrl, String indexFieldString) {
    final ColumnSpec[] partitions = {};
    TableType tableType = TableType.MASTER;
    List<String> indexFields = Arrays.asList(indexFieldString.toLowerCase().split(","));
    List<ColumnSpec> indexCols = Arrays.asList(columnSpecs).stream().filter(p -> indexFields.contains(p.getName().toLowerCase())).collect(Collectors.toList());
    String columnsSQL = tableType.deriveColumnSpecification(indexCols.toArray(new ColumnSpec[indexCols.size()]), partitions, "");
    String hql = generateHQL(columnsSQL, nodes, locationRoot, feedName, categoryName, useWan, autoIndex, idField);
    List<String> hiveStatements = new ArrayList<>();
    if (jarUrl != null && !jarUrl.isEmpty()) {
        String addJar = "ADD JAR " + jarUrl;
        hiveStatements.add(addJar);
    }
    hiveStatements.add(hql);
    return hiveStatements;
}
Also used : StandardValidators(org.apache.nifi.processor.util.StandardValidators) Arrays(java.util.Arrays) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) StringUtils(org.apache.commons.lang3.StringUtils) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Relationship(org.apache.nifi.processor.Relationship) FEED_NAME(com.thinkbiganalytics.nifi.v2.ingest.IngestProperties.FEED_NAME) Path(java.nio.file.Path) TableType(com.thinkbiganalytics.util.TableType) ExecuteHQLStatement(com.thinkbiganalytics.nifi.v2.thrift.ExecuteHQLStatement) FlowFile(org.apache.nifi.flowfile.FlowFile) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) Collectors(java.util.stream.Collectors) ThriftService(com.thinkbiganalytics.nifi.v2.thrift.ThriftService) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) List(java.util.List) FEED_CATEGORY(com.thinkbiganalytics.nifi.v2.ingest.IngestProperties.FEED_CATEGORY) FIELD_SPECIFICATION(com.thinkbiganalytics.nifi.v2.ingest.IngestProperties.FIELD_SPECIFICATION) Paths(java.nio.file.Paths) Optional(java.util.Optional) Tags(org.apache.nifi.annotation.documentation.Tags) Collections(java.util.Collections) ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) TableType(com.thinkbiganalytics.util.TableType) ArrayList(java.util.ArrayList)

Example 4 with ColumnSpec

use of com.thinkbiganalytics.util.ColumnSpec in project kylo by Teradata.

the class RegisterFeedTables method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // Verify flow file exists
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    // Verify properties and attributes
    final String feedFormatOptions = Optional.ofNullable(context.getProperty(FEED_FORMAT_SPECS).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).orElse(DEFAULT_FEED_FORMAT_OPTIONS);
    final String targetFormatOptions = Optional.ofNullable(context.getProperty(TARGET_FORMAT_SPECS).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).orElse(DEFAULT_STORAGE_FORMAT);
    final String feedTableProperties = context.getProperty(FEED_TBLPROPERTIES).evaluateAttributeExpressions(flowFile).getValue();
    final String targetTableProperties = context.getProperty(TARGET_TBLPROPERTIES).evaluateAttributeExpressions(flowFile).getValue();
    final ColumnSpec[] partitions = Optional.ofNullable(context.getProperty(PARTITION_SPECS).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
    final String tableType = context.getProperty(TABLE_TYPE).getValue();
    final String feedTableOverride = context.getProperty(FEED_TABLE_OVERRIDE).evaluateAttributeExpressions(flowFile).getValue();
    final ColumnSpec[] columnSpecs = Optional.ofNullable(context.getProperty(FIELD_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
    if (StringUtils.isEmpty(feedTableOverride) && (columnSpecs == null || columnSpecs.length == 0)) {
        getLog().error("Missing field specification");
        session.transfer(flowFile, IngestProperties.REL_FAILURE);
        return;
    }
    ColumnSpec[] feedColumnSpecs = Optional.ofNullable(context.getProperty(FEED_FIELD_SPECIFICATION).evaluateAttributeExpressions(flowFile).getValue()).filter(StringUtils::isNotEmpty).map(ColumnSpec::createFromString).orElse(new ColumnSpec[0]);
    if (feedColumnSpecs == null || feedColumnSpecs.length == 0) {
        // Backwards compatibility with older templates we set the source and target to the same
        feedColumnSpecs = columnSpecs;
    }
    final String entity = context.getProperty(IngestProperties.FEED_NAME).evaluateAttributeExpressions(flowFile).getValue();
    if (entity == null || entity.isEmpty()) {
        getLog().error("Missing feed name");
        session.transfer(flowFile, IngestProperties.REL_FAILURE);
        return;
    }
    final String source = context.getProperty(IngestProperties.FEED_CATEGORY).evaluateAttributeExpressions(flowFile).getValue();
    if (source == null || source.isEmpty()) {
        getLog().error("Missing category name");
        session.transfer(flowFile, IngestProperties.REL_FAILURE);
        return;
    }
    final String feedRoot = context.getProperty(FEED_ROOT).evaluateAttributeExpressions(flowFile).getValue();
    final String profileRoot = context.getProperty(PROFILE_ROOT).evaluateAttributeExpressions(flowFile).getValue();
    final String masterRoot = context.getProperty(MASTER_ROOT).evaluateAttributeExpressions(flowFile).getValue();
    final TableRegisterConfiguration config = new TableRegisterConfiguration(feedRoot, profileRoot, masterRoot);
    // Register the tables
    final ThriftService thriftService = context.getProperty(THRIFT_SERVICE).asControllerService(ThriftService.class);
    try (final Connection conn = thriftService.getConnection()) {
        final TableRegisterSupport register = new TableRegisterSupport(conn, config);
        final boolean result;
        if (ALL_TABLES.equals(tableType)) {
            result = register.registerStandardTables(source, entity, feedColumnSpecs, feedFormatOptions, targetFormatOptions, partitions, columnSpecs, feedTableProperties, targetTableProperties, feedTableOverride);
        } else {
            result = register.registerTable(source, entity, feedColumnSpecs, feedFormatOptions, targetFormatOptions, partitions, columnSpecs, feedTableProperties, targetTableProperties, TableType.valueOf(tableType), true, feedTableOverride);
        }
        final Relationship relnResult = (result ? REL_SUCCESS : REL_FAILURE);
        session.transfer(flowFile, relnResult);
    } catch (final ProcessException | SQLException e) {
        getLog().error("Unable to obtain connection for {} due to {}; routing to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) SQLException(java.sql.SQLException) Connection(java.sql.Connection) TableRegisterConfiguration(com.thinkbiganalytics.util.TableRegisterConfiguration) ProcessException(org.apache.nifi.processor.exception.ProcessException) ThriftService(com.thinkbiganalytics.nifi.v2.thrift.ThriftService) StringUtils(org.apache.commons.lang3.StringUtils) TableRegisterSupport(com.thinkbiganalytics.ingest.TableRegisterSupport) Relationship(org.apache.nifi.processor.Relationship)

Example 5 with ColumnSpec

use of com.thinkbiganalytics.util.ColumnSpec in project kylo by Teradata.

the class TableRegisterSupport method adjustInvalidColumnSpec.

/**
 * copy the columnSpecs and reset the datatypes to match that of the feed column specs
 *
 * @param feedColumnSpecs
 * @param columnSpecs
 * @return
 */
protected ColumnSpec[] adjustInvalidColumnSpec(ColumnSpec[] feedColumnSpecs, ColumnSpec[] columnSpecs) {
    // find the source data types from the _feed table that match these columns and replace the data types
    Map<String, ColumnSpec> feedColumnSpecMap = Arrays.asList(feedColumnSpecs).stream().collect(Collectors.toMap(ColumnSpec::getName, Function.identity()));
    List<ColumnSpec> invalidColumnSpecs = Arrays.asList(columnSpecs).stream().map(c -> {
        ColumnSpec copy = new ColumnSpec(c);
        if (StringUtils.isNotBlank(copy.getOtherColumnName()) && feedColumnSpecMap.containsKey(copy.getOtherColumnName())) {
            ColumnSpec feedSpec = feedColumnSpecMap.get(copy.getOtherColumnName());
            copy.setDataType(feedSpec.getDataType());
        }
        return copy;
    }).collect(Collectors.toList());
    return invalidColumnSpecs.toArray(new ColumnSpec[invalidColumnSpecs.size()]);
}
Also used : TableType(com.thinkbiganalytics.util.TableType) java.util(java.util) Logger(org.slf4j.Logger) Connection(java.sql.Connection) LoggerFactory(org.slf4j.LoggerFactory) ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) StringUtils(org.apache.commons.lang3.StringUtils) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) HiveUtils(com.thinkbiganalytics.hive.util.HiveUtils) SQLException(java.sql.SQLException) Validate(org.apache.commons.lang3.Validate) ResultSet(java.sql.ResultSet) Statement(java.sql.Statement) Nonnull(javax.annotation.Nonnull) TableRegisterConfiguration(com.thinkbiganalytics.util.TableRegisterConfiguration) ColumnSpec(com.thinkbiganalytics.util.ColumnSpec)

Aggregations

ColumnSpec (com.thinkbiganalytics.util.ColumnSpec)14 Test (org.junit.Test)9 TableRegisterConfiguration (com.thinkbiganalytics.util.TableRegisterConfiguration)5 TableType (com.thinkbiganalytics.util.TableType)5 StringUtils (org.apache.commons.lang3.StringUtils)5 PartitionSpec (com.thinkbiganalytics.util.PartitionSpec)4 SQLException (java.sql.SQLException)4 FlowFile (org.apache.nifi.flowfile.FlowFile)4 HiveUtils (com.thinkbiganalytics.hive.util.HiveUtils)3 ThriftService (com.thinkbiganalytics.nifi.v2.thrift.ThriftService)3 Connection (java.sql.Connection)3 Arrays (java.util.Arrays)3 HashSet (java.util.HashSet)3 List (java.util.List)3 ProcessException (org.apache.nifi.processor.exception.ProcessException)3 HiveShell (com.klarna.hiverunner.HiveShell)2 StandaloneHiveRunner (com.klarna.hiverunner.StandaloneHiveRunner)2 HiveProperties (com.klarna.hiverunner.annotations.HiveProperties)2 HiveRunnerSetup (com.klarna.hiverunner.annotations.HiveRunnerSetup)2 HiveSQL (com.klarna.hiverunner.annotations.HiveSQL)2