Search in sources :

Example 1 with TableType

use of com.thinkbiganalytics.util.TableType in project kylo by Teradata.

the class CreateElasticsearchBackedHiveTable method getHQLStatements.

public List<String> getHQLStatements(ColumnSpec[] columnSpecs, String nodes, String locationRoot, String feedName, String categoryName, String useWan, String autoIndex, String idField, String jarUrl, String indexFieldString) {
    final ColumnSpec[] partitions = {};
    TableType tableType = TableType.MASTER;
    List<String> indexFields = Arrays.asList(indexFieldString.toLowerCase().split(","));
    List<ColumnSpec> indexCols = Arrays.asList(columnSpecs).stream().filter(p -> indexFields.contains(p.getName().toLowerCase())).collect(Collectors.toList());
    String columnsSQL = tableType.deriveColumnSpecification(indexCols.toArray(new ColumnSpec[indexCols.size()]), partitions, "");
    String hql = generateHQL(columnsSQL, nodes, locationRoot, feedName, categoryName, useWan, autoIndex, idField);
    List<String> hiveStatements = new ArrayList<>();
    if (jarUrl != null && !jarUrl.isEmpty()) {
        String addJar = "ADD JAR " + jarUrl;
        hiveStatements.add(addJar);
    }
    hiveStatements.add(hql);
    return hiveStatements;
}
Also used : StandardValidators(org.apache.nifi.processor.util.StandardValidators) Arrays(java.util.Arrays) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) StringUtils(org.apache.commons.lang3.StringUtils) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Relationship(org.apache.nifi.processor.Relationship) FEED_NAME(com.thinkbiganalytics.nifi.v2.ingest.IngestProperties.FEED_NAME) Path(java.nio.file.Path) TableType(com.thinkbiganalytics.util.TableType) ExecuteHQLStatement(com.thinkbiganalytics.nifi.v2.thrift.ExecuteHQLStatement) FlowFile(org.apache.nifi.flowfile.FlowFile) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) Collectors(java.util.stream.Collectors) ThriftService(com.thinkbiganalytics.nifi.v2.thrift.ThriftService) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) List(java.util.List) FEED_CATEGORY(com.thinkbiganalytics.nifi.v2.ingest.IngestProperties.FEED_CATEGORY) FIELD_SPECIFICATION(com.thinkbiganalytics.nifi.v2.ingest.IngestProperties.FIELD_SPECIFICATION) Paths(java.nio.file.Paths) Optional(java.util.Optional) Tags(org.apache.nifi.annotation.documentation.Tags) Collections(java.util.Collections) ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) TableType(com.thinkbiganalytics.util.TableType) ArrayList(java.util.ArrayList)

Example 2 with TableType

use of com.thinkbiganalytics.util.TableType in project kylo by Teradata.

the class DropFeedTables method onTrigger.

@Override
public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session) throws ProcessException {
    // Verify flow file exists
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    // Verify properties and attributes
    String additionalTablesValue = context.getProperty(ADDITIONAL_TABLES).evaluateAttributeExpressions(flowFile).getValue();
    Set<String> additionalTables = (additionalTablesValue != null) ? ImmutableSet.copyOf(additionalTablesValue.split(",")) : ImmutableSet.of();
    String entity = context.getProperty(IngestProperties.FEED_NAME).evaluateAttributeExpressions(flowFile).getValue();
    if (entity == null || entity.isEmpty()) {
        getLog().error("Missing feed name");
        session.transfer(flowFile, IngestProperties.REL_FAILURE);
        return;
    }
    String source = context.getProperty(IngestProperties.FEED_CATEGORY).evaluateAttributeExpressions(flowFile).getValue();
    if (source == null || source.isEmpty()) {
        getLog().error("Missing category name");
        session.transfer(flowFile, IngestProperties.REL_FAILURE);
        return;
    }
    Set<TableType> tableTypes;
    String tableTypesValue = context.getProperty(TABLE_TYPE).getValue();
    if (ALL_TABLES.equals(tableTypesValue)) {
        tableTypes = ImmutableSet.copyOf(TableType.values());
    } else {
        tableTypes = ImmutableSet.of(TableType.valueOf(tableTypesValue));
    }
    // Drop the tables
    final ThriftService thriftService = context.getProperty(IngestProperties.THRIFT_SERVICE).asControllerService(ThriftService.class);
    try (final Connection conn = thriftService.getConnection()) {
        boolean result = new TableRegisterSupport(conn).dropTables(source, entity, tableTypes, additionalTables);
        session.transfer(flowFile, result ? IngestProperties.REL_SUCCESS : IngestProperties.REL_FAILURE);
    } catch (final Exception e) {
        getLog().error("Unable drop tables", e);
        session.transfer(flowFile, IngestProperties.REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) TableType(com.thinkbiganalytics.util.TableType) ThriftService(com.thinkbiganalytics.nifi.v2.thrift.ThriftService) TableRegisterSupport(com.thinkbiganalytics.ingest.TableRegisterSupport) Connection(java.sql.Connection) ProcessException(org.apache.nifi.processor.exception.ProcessException)

Example 3 with TableType

use of com.thinkbiganalytics.util.TableType in project kylo by Teradata.

the class TableRegisterSupportTest method testTableCreateS3.

@Test
public void testTableCreateS3() {
    ColumnSpec[] specs = ColumnSpec.createFromString("id|bigint|my comment\nname|string\ncompany|string|some description\nzip|string\nphone|string\nemail|string\ncountry|string\nhired|date");
    ColumnSpec[] parts = ColumnSpec.createFromString("year|int\ncountry|string");
    TableRegisterConfiguration conf = new TableRegisterConfiguration("s3a://testBucket/model.db/", "s3a://testBucket/model.db/", "s3a://testBucket/app/warehouse/");
    TableRegisterSupport support = new TableRegisterSupport(connection, conf);
    TableType[] tableTypes = new TableType[] { TableType.FEED, TableType.INVALID, TableType.VALID, TableType.MASTER };
    for (TableType tableType : tableTypes) {
        String ddl = support.createDDL("bar", "employee", specs, parts, "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'", "stored as orc", "tblproperties (\"orc.compress\"=\"SNAPPY\")", tableType);
        String location = StringUtils.substringBetween(ddl, "LOCATION '", "'");
        if (tableType == TableType.MASTER) {
            assertEquals("Master location does not match", "s3a://testBucket/app/warehouse/bar/employee", location);
        } else {
            assertEquals("Locations do not match", "s3a://testBucket/model.db/bar/employee/" + tableType.toString().toLowerCase(), location);
        }
    }
}
Also used : ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) TableType(com.thinkbiganalytics.util.TableType) TableRegisterConfiguration(com.thinkbiganalytics.util.TableRegisterConfiguration) Test(org.junit.Test)

Example 4 with TableType

use of com.thinkbiganalytics.util.TableType in project kylo by Teradata.

the class TableRegisterSupportTest method testTableCreate.

@Test
public void testTableCreate() {
    ColumnSpec[] specs = ColumnSpec.createFromString("id|bigint|my comment\nname|string\ncompany|string|some description\nzip|string\nphone|string\nemail|string\ncountry|string\nhired|date");
    ColumnSpec[] parts = ColumnSpec.createFromString("year|int\ncountry|string");
    TableRegisterSupport support = new TableRegisterSupport(connection);
    TableType[] tableTypes = new TableType[] { TableType.FEED, TableType.INVALID, TableType.VALID, TableType.MASTER };
    for (TableType tableType : tableTypes) {
        String ddl = support.createDDL("bar", "employee", specs, parts, "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'", "stored as orc", "tblproperties (\"orc.compress\"=\"SNAPPY\")", tableType);
        // Hack to make a legal file root
        ddl = ddl.replace("LOCATION '", "LOCATION '${hiveconf:MY.HDFS.DIR}");
        hiveShell.execute(ddl);
    }
}
Also used : ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) TableType(com.thinkbiganalytics.util.TableType) Test(org.junit.Test)

Example 5 with TableType

use of com.thinkbiganalytics.util.TableType in project kylo by Teradata.

the class TableRegisterSupportTest method testRemovingColumns.

@Test
public void testRemovingColumns() {
    ColumnSpec[] feedSpecs = ColumnSpec.createFromString("id|string|my comment|0|0|0|id\n" + "name|string||0|0|0|name\n" + "company|string|some description|0|0|0|change_company\n" + "zip|string||0|0|0|zip_code\n" + "phone|string\n" + "email|string\n" + "country|string\n" + "hired|string");
    ColumnSpec[] targetSpecs = ColumnSpec.createFromString("id|bigint|my comment|0|0|0|id\n" + "name|string||0|0|0|name\n" + "change_company|string|some description|0|0|0|company\n" + "zip_code|string||0|0|0|zip\n" + "email|string\n" + "hired|date||0|0|0|hired");
    ColumnSpec[] parts = ColumnSpec.createFromString("year|int\ncountry|string");
    TableRegisterConfiguration conf = new TableRegisterConfiguration();
    TableRegisterSupport support = new TableRegisterSupport(connection, conf);
    ColumnSpec[] invalidColumnSpecs = support.adjustInvalidColumnSpec(feedSpecs, targetSpecs);
    assertEquals(targetSpecs.length, invalidColumnSpecs.length);
    Map<String, ColumnSpec> feedColumnSpecMap = Arrays.asList(feedSpecs).stream().collect(Collectors.toMap(ColumnSpec::getName, Function.identity()));
    for (ColumnSpec invalid : invalidColumnSpecs) {
        if (StringUtils.isNotBlank(invalid.getOtherColumnName())) {
            assertEquals(invalid.getDataType(), feedColumnSpecMap.get(invalid.getOtherColumnName()).getDataType());
        }
    }
    TableType[] tableTypes = new TableType[] { TableType.FEED, TableType.INVALID, TableType.VALID, TableType.MASTER };
    for (TableType tableType : tableTypes) {
        ColumnSpec[] useColumnSpecs = targetSpecs;
        if (tableType == TableType.INVALID) {
            useColumnSpecs = invalidColumnSpecs;
        } else if (tableType == TableType.FEED) {
            useColumnSpecs = feedSpecs;
        }
        String ddl = support.createDDL("source_table", "target_table", useColumnSpecs, parts, "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'", "stored as orc", "tblproperties (\"orc.compress\"=\"SNAPPY\")", tableType);
        int i = 0;
    }
}
Also used : ColumnSpec(com.thinkbiganalytics.util.ColumnSpec) TableType(com.thinkbiganalytics.util.TableType) TableRegisterConfiguration(com.thinkbiganalytics.util.TableRegisterConfiguration) Test(org.junit.Test)

Aggregations

TableType (com.thinkbiganalytics.util.TableType)6 ColumnSpec (com.thinkbiganalytics.util.ColumnSpec)4 Test (org.junit.Test)3 ThriftService (com.thinkbiganalytics.nifi.v2.thrift.ThriftService)2 TableRegisterConfiguration (com.thinkbiganalytics.util.TableRegisterConfiguration)2 FlowFile (org.apache.nifi.flowfile.FlowFile)2 ProcessException (org.apache.nifi.processor.exception.ProcessException)2 TableRegisterSupport (com.thinkbiganalytics.ingest.TableRegisterSupport)1 FEED_CATEGORY (com.thinkbiganalytics.nifi.v2.ingest.IngestProperties.FEED_CATEGORY)1 FEED_NAME (com.thinkbiganalytics.nifi.v2.ingest.IngestProperties.FEED_NAME)1 FIELD_SPECIFICATION (com.thinkbiganalytics.nifi.v2.ingest.IngestProperties.FIELD_SPECIFICATION)1 ExecuteHQLStatement (com.thinkbiganalytics.nifi.v2.thrift.ExecuteHQLStatement)1 Path (java.nio.file.Path)1 Paths (java.nio.file.Paths)1 Connection (java.sql.Connection)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 HashSet (java.util.HashSet)1 List (java.util.List)1