Search in sources :

Example 1 with Field

use of com.thinkbiganalytics.discovery.schema.Field in project kylo by Teradata.

the class DBSchemaParser method listColumns.

private List<Field> listColumns(Connection conn, String schema, String tableName) throws SQLException {
    List<Field> fields;
    Set<String> pkSet = listPrimaryKeys(conn, schema, tableName);
    try (ResultSet columns = conn.getMetaData().getColumns(null, schema, tableName, null)) {
        fields = columnsResultSetToField(columns, pkSet, schema);
    }
    if (fields.isEmpty()) {
        // if empty try the schema as the catalog (for MySQL db)
        try (ResultSet columns = conn.getMetaData().getColumns(schema, null, tableName, null)) {
            fields = columnsResultSetToField(columns, pkSet, schema);
        }
    }
    return fields;
}
Also used : DefaultField(com.thinkbiganalytics.discovery.model.DefaultField) Field(com.thinkbiganalytics.discovery.schema.Field) ResultSet(java.sql.ResultSet)

Example 2 with Field

use of com.thinkbiganalytics.discovery.schema.Field in project kylo by Teradata.

the class DBSchemaParser method columnsResultSetToField.

private List<Field> columnsResultSetToField(ResultSet columns, Set<String> pkSet, String schema) throws SQLException {
    List<Field> fields = new Vector<>();
    if (columns != null) {
        while (columns.next()) {
            String cat = columns.getString("TABLE_CAT");
            if (StringUtils.isNotBlank(cat) && StringUtils.isNotBlank(schema)) {
                // this db supports Catalogs.  Ensure the cat matches the supplied schema
                if (!schema.equalsIgnoreCase(cat)) {
                    continue;
                }
            }
            DefaultField field = new DefaultField();
            field.setName(columns.getString("COLUMN_NAME"));
            Integer dataType = columns.getInt("DATA_TYPE");
            field.setNativeDataType(ParserHelper.toNativeType(dataType));
            field.setDerivedDataType(ParserHelper.sqlTypeToHiveType(dataType));
            field.setDescription(columns.getString("REMARKS"));
            String isNullableString = columns.getString("IS_NULLABLE");
            if ("NO".equals(isNullableString)) {
                field.setNullable(false);
            }
            if (pkSet.contains(field.getName())) {
                field.setPrimaryKey(true);
            }
            fields.add(field);
        }
    }
    return fields;
}
Also used : DefaultField(com.thinkbiganalytics.discovery.model.DefaultField) Field(com.thinkbiganalytics.discovery.schema.Field) DefaultField(com.thinkbiganalytics.discovery.model.DefaultField) Vector(java.util.Vector)

Example 3 with Field

use of com.thinkbiganalytics.discovery.schema.Field in project kylo by Teradata.

the class FeedHiveTableService method updateColumnDescriptions.

/**
 * Updates the column descriptions in the Hive metastore for the specified feed.
 *
 * @param feed the feed to update
 * @throws DataAccessException if there is any problem
 */
public void updateColumnDescriptions(@Nonnull final FeedMetadata feed) {
    final List<Field> feedFields = Optional.ofNullable(feed.getTable()).map(TableSetup::getTableSchema).map(TableSchema::getFields).orElse(null);
    if (feedFields != null && !feedFields.isEmpty()) {
        final TableSchema hiveSchema = hiveService.getTableSchema(feed.getSystemCategoryName(), feed.getSystemFeedName());
        if (hiveSchema != null) {
            final Map<String, Field> hiveFieldMap = hiveSchema.getFields().stream().collect(Collectors.toMap(field -> field.getName().toLowerCase(), Function.identity()));
            feedFields.stream().filter(feedField -> {
                final Field hiveField = hiveFieldMap.get(feedField.getName().toLowerCase());
                return hiveField != null && (StringUtils.isNotEmpty(feedField.getDescription()) || StringUtils.isNotEmpty(hiveField.getDescription())) && !Objects.equals(feedField.getDescription(), hiveField.getDescription());
            }).forEach(feedField -> changeColumn(feed, feedField.getName(), feedField));
        }
    }
}
Also used : DataAccessException(org.springframework.dao.DataAccessException) FeedMetadata(com.thinkbiganalytics.feedmgr.rest.model.FeedMetadata) StringUtils(org.apache.commons.lang3.StringUtils) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) HiveUtils(com.thinkbiganalytics.hive.util.HiveUtils) Objects(java.util.Objects) List(java.util.List) Field(com.thinkbiganalytics.discovery.schema.Field) HiveService(com.thinkbiganalytics.hive.service.HiveService) Map(java.util.Map) TableSchema(com.thinkbiganalytics.discovery.schema.TableSchema) Optional(java.util.Optional) TableSetup(com.thinkbiganalytics.feedmgr.rest.model.schema.TableSetup) Nonnull(javax.annotation.Nonnull) Field(com.thinkbiganalytics.discovery.schema.Field) TableSchema(com.thinkbiganalytics.discovery.schema.TableSchema) TableSetup(com.thinkbiganalytics.feedmgr.rest.model.schema.TableSetup)

Example 4 with Field

use of com.thinkbiganalytics.discovery.schema.Field in project kylo by Teradata.

the class FeedIT method assertBinaryColumnData.

private void assertBinaryColumnData(String feedName) {
    LOG.info("Asserting binary CC column data");
    DefaultHiveSchema schema = getHiveSchema("functional_tests", feedName);
    Field ccField = schema.getFields().stream().filter(field -> field.getName().equals("cc")).iterator().next();
    Assert.assertEquals("binary", ccField.getDerivedDataType());
    List<HashMap<String, String>> rows = getHiveQuery("SELECT cc FROM " + "functional_tests" + "." + feedName + " where id = 1");
    Assert.assertEquals(1, rows.size());
    HashMap<String, String> row = rows.get(0);
    // where TmpjMU9UVXlNVGcyTkRreU1ERXhOZz09 is double Base64 encoding for cc field of the first row (6759521864920116),
    // one base64 encoding by our standardiser and second base64 encoding by spring framework for returning binary data
    Assert.assertEquals("TmpjMU9UVXlNVGcyTkRreU1ERXhOZz09", row.get("cc"));
}
Also used : PartitionField(com.thinkbiganalytics.feedmgr.rest.model.schema.PartitionField) Field(com.thinkbiganalytics.discovery.schema.Field) HashMap(java.util.HashMap) DefaultHiveSchema(com.thinkbiganalytics.discovery.model.DefaultHiveSchema)

Example 5 with Field

use of com.thinkbiganalytics.discovery.schema.Field in project kylo by Teradata.

the class CSVFileSchemaParser method populateSchema.

private DefaultFileSchema populateSchema(CSVParser parser) {
    DefaultFileSchema fileSchema = new DefaultFileSchema();
    int i = 0;
    ArrayList<Field> fields = new ArrayList<>();
    for (CSVRecord record : parser) {
        if (i > 9) {
            break;
        }
        int size = record.size();
        for (int j = 0; j < size; j++) {
            DefaultField field = null;
            if (i == 0) {
                field = new DefaultField();
                if (headerRow) {
                    field.setName(record.get(j));
                } else {
                    field.setName("Col_" + (j + 1));
                }
                fields.add(field);
            } else {
                try {
                    field = (DefaultField) fields.get(j);
                    field.getSampleValues().add(StringUtils.defaultString(record.get(j), ""));
                } catch (IndexOutOfBoundsException e) {
                    LOG.warn("Sample file has potential sparse column problem at row [?] field [?]", i + 1, j + 1);
                }
            }
        }
        i++;
    }
    fileSchema.setFields(fields);
    return fileSchema;
}
Also used : DefaultField(com.thinkbiganalytics.discovery.model.DefaultField) Field(com.thinkbiganalytics.discovery.schema.Field) DefaultFileSchema(com.thinkbiganalytics.discovery.model.DefaultFileSchema) ArrayList(java.util.ArrayList) CSVRecord(org.apache.commons.csv.CSVRecord) DefaultField(com.thinkbiganalytics.discovery.model.DefaultField)

Aggregations

Field (com.thinkbiganalytics.discovery.schema.Field)11 DefaultField (com.thinkbiganalytics.discovery.model.DefaultField)6 ArrayList (java.util.ArrayList)3 JsonIgnore (com.fasterxml.jackson.annotation.JsonIgnore)2 DefaultHiveSchema (com.thinkbiganalytics.discovery.model.DefaultHiveSchema)2 FeedMetadata (com.thinkbiganalytics.feedmgr.rest.model.FeedMetadata)2 PartitionField (com.thinkbiganalytics.feedmgr.rest.model.schema.PartitionField)2 TableSetup (com.thinkbiganalytics.feedmgr.rest.model.schema.TableSetup)2 MetadataField (com.thinkbiganalytics.metadata.MetadataField)2 List (java.util.List)2 DefaultFileSchema (com.thinkbiganalytics.discovery.model.DefaultFileSchema)1 DefaultTableSchema (com.thinkbiganalytics.discovery.model.DefaultTableSchema)1 DefaultTag (com.thinkbiganalytics.discovery.model.DefaultTag)1 QueryResultColumn (com.thinkbiganalytics.discovery.schema.QueryResultColumn)1 Schema (com.thinkbiganalytics.discovery.schema.Schema)1 TableSchema (com.thinkbiganalytics.discovery.schema.TableSchema)1 Tag (com.thinkbiganalytics.discovery.schema.Tag)1 FeedSchedule (com.thinkbiganalytics.feedmgr.rest.model.FeedSchedule)1 FeedProcessingOptions (com.thinkbiganalytics.feedmgr.rest.model.schema.FeedProcessingOptions)1 TableOptions (com.thinkbiganalytics.feedmgr.rest.model.schema.TableOptions)1