use of com.thinkbiganalytics.discovery.schema.Field in project kylo by Teradata.
the class DBSchemaParser method listColumns.
private List<Field> listColumns(Connection conn, String schema, String tableName) throws SQLException {
List<Field> fields;
Set<String> pkSet = listPrimaryKeys(conn, schema, tableName);
try (ResultSet columns = conn.getMetaData().getColumns(null, schema, tableName, null)) {
fields = columnsResultSetToField(columns, pkSet, schema);
}
if (fields.isEmpty()) {
// if empty try the schema as the catalog (for MySQL db)
try (ResultSet columns = conn.getMetaData().getColumns(schema, null, tableName, null)) {
fields = columnsResultSetToField(columns, pkSet, schema);
}
}
return fields;
}
use of com.thinkbiganalytics.discovery.schema.Field in project kylo by Teradata.
the class DBSchemaParser method columnsResultSetToField.
private List<Field> columnsResultSetToField(ResultSet columns, Set<String> pkSet, String schema) throws SQLException {
List<Field> fields = new Vector<>();
if (columns != null) {
while (columns.next()) {
String cat = columns.getString("TABLE_CAT");
if (StringUtils.isNotBlank(cat) && StringUtils.isNotBlank(schema)) {
// this db supports Catalogs. Ensure the cat matches the supplied schema
if (!schema.equalsIgnoreCase(cat)) {
continue;
}
}
DefaultField field = new DefaultField();
field.setName(columns.getString("COLUMN_NAME"));
Integer dataType = columns.getInt("DATA_TYPE");
field.setNativeDataType(ParserHelper.toNativeType(dataType));
field.setDerivedDataType(ParserHelper.sqlTypeToHiveType(dataType));
field.setDescription(columns.getString("REMARKS"));
String isNullableString = columns.getString("IS_NULLABLE");
if ("NO".equals(isNullableString)) {
field.setNullable(false);
}
if (pkSet.contains(field.getName())) {
field.setPrimaryKey(true);
}
fields.add(field);
}
}
return fields;
}
use of com.thinkbiganalytics.discovery.schema.Field in project kylo by Teradata.
the class FeedHiveTableService method updateColumnDescriptions.
/**
* Updates the column descriptions in the Hive metastore for the specified feed.
*
* @param feed the feed to update
* @throws DataAccessException if there is any problem
*/
public void updateColumnDescriptions(@Nonnull final FeedMetadata feed) {
final List<Field> feedFields = Optional.ofNullable(feed.getTable()).map(TableSetup::getTableSchema).map(TableSchema::getFields).orElse(null);
if (feedFields != null && !feedFields.isEmpty()) {
final TableSchema hiveSchema = hiveService.getTableSchema(feed.getSystemCategoryName(), feed.getSystemFeedName());
if (hiveSchema != null) {
final Map<String, Field> hiveFieldMap = hiveSchema.getFields().stream().collect(Collectors.toMap(field -> field.getName().toLowerCase(), Function.identity()));
feedFields.stream().filter(feedField -> {
final Field hiveField = hiveFieldMap.get(feedField.getName().toLowerCase());
return hiveField != null && (StringUtils.isNotEmpty(feedField.getDescription()) || StringUtils.isNotEmpty(hiveField.getDescription())) && !Objects.equals(feedField.getDescription(), hiveField.getDescription());
}).forEach(feedField -> changeColumn(feed, feedField.getName(), feedField));
}
}
}
use of com.thinkbiganalytics.discovery.schema.Field in project kylo by Teradata.
the class FeedIT method assertBinaryColumnData.
private void assertBinaryColumnData(String feedName) {
LOG.info("Asserting binary CC column data");
DefaultHiveSchema schema = getHiveSchema("functional_tests", feedName);
Field ccField = schema.getFields().stream().filter(field -> field.getName().equals("cc")).iterator().next();
Assert.assertEquals("binary", ccField.getDerivedDataType());
List<HashMap<String, String>> rows = getHiveQuery("SELECT cc FROM " + "functional_tests" + "." + feedName + " where id = 1");
Assert.assertEquals(1, rows.size());
HashMap<String, String> row = rows.get(0);
// where TmpjMU9UVXlNVGcyTkRreU1ERXhOZz09 is double Base64 encoding for cc field of the first row (6759521864920116),
// one base64 encoding by our standardiser and second base64 encoding by spring framework for returning binary data
Assert.assertEquals("TmpjMU9UVXlNVGcyTkRreU1ERXhOZz09", row.get("cc"));
}
use of com.thinkbiganalytics.discovery.schema.Field in project kylo by Teradata.
the class CSVFileSchemaParser method populateSchema.
private DefaultFileSchema populateSchema(CSVParser parser) {
DefaultFileSchema fileSchema = new DefaultFileSchema();
int i = 0;
ArrayList<Field> fields = new ArrayList<>();
for (CSVRecord record : parser) {
if (i > 9) {
break;
}
int size = record.size();
for (int j = 0; j < size; j++) {
DefaultField field = null;
if (i == 0) {
field = new DefaultField();
if (headerRow) {
field.setName(record.get(j));
} else {
field.setName("Col_" + (j + 1));
}
fields.add(field);
} else {
try {
field = (DefaultField) fields.get(j);
field.getSampleValues().add(StringUtils.defaultString(record.get(j), ""));
} catch (IndexOutOfBoundsException e) {
LOG.warn("Sample file has potential sparse column problem at row [?] field [?]", i + 1, j + 1);
}
}
}
i++;
}
fileSchema.setFields(fields);
return fileSchema;
}
Aggregations