use of com.thinkbiganalytics.discovery.model.DefaultHiveSchema in project kylo by Teradata.
the class FeedIT method assertBinaryColumnData.
private void assertBinaryColumnData(String feedName) {
LOG.info("Asserting binary CC column data");
DefaultHiveSchema schema = getHiveSchema("functional_tests", feedName);
Field ccField = schema.getFields().stream().filter(field -> field.getName().equals("cc")).iterator().next();
Assert.assertEquals("binary", ccField.getDerivedDataType());
List<HashMap<String, String>> rows = getHiveQuery("SELECT cc FROM " + "functional_tests" + "." + feedName + " where id = 1");
Assert.assertEquals(1, rows.size());
HashMap<String, String> row = rows.get(0);
// where TmpjMU9UVXlNVGcyTkRreU1ERXhOZz09 is double Base64 encoding for cc field of the first row (6759521864920116),
// one base64 encoding by our standardiser and second base64 encoding by spring framework for returning binary data
Assert.assertEquals("TmpjMU9UVXlNVGcyTkRreU1ERXhOZz09", row.get("cc"));
}
use of com.thinkbiganalytics.discovery.model.DefaultHiveSchema in project kylo by Teradata.
the class CSVFileSchemaParser method convertToTarget.
/**
* Converts the raw file schema to the target schema with correctly derived types
*
* @param target the target schema
* @param sourceSchema the source
* @return the schema
*/
protected Schema convertToTarget(TableSchemaType target, Schema sourceSchema) {
Schema targetSchema;
switch(target) {
case RAW:
targetSchema = sourceSchema;
break;
case HIVE:
DefaultHiveSchema hiveSchema = new DefaultHiveSchema();
BeanUtils.copyProperties(sourceSchema, hiveSchema);
hiveSchema.setHiveFormat(deriveHiveRecordFormat());
ParserHelper.deriveDataTypes(target, hiveSchema.getFields());
targetSchema = hiveSchema;
break;
case RDBMS:
DefaultTableSchema rdbmsSchema = new DefaultTableSchema();
BeanUtils.copyProperties(sourceSchema, rdbmsSchema);
ParserHelper.deriveDataTypes(target, rdbmsSchema.getFields());
targetSchema = rdbmsSchema;
break;
default:
throw new IllegalArgumentException(target.name() + " is not supported by this parser");
}
return targetSchema;
}
use of com.thinkbiganalytics.discovery.model.DefaultHiveSchema in project kylo by Teradata.
the class SparkFileSchemaParserService method toHiveSchema.
private DefaultHiveSchema toHiveSchema(TransformQueryResult result, SparkFileType fileType) {
DefaultHiveSchema schema = new DefaultHiveSchema();
schema.setHiveFormat("STORED AS " + fileType);
schema.setStructured(true);
ArrayList<Field> fields = new ArrayList<>();
List<? extends QueryResultColumn> columns = result.getColumns();
for (int i = 0; i < columns.size(); ++i) {
QueryResultColumn column = columns.get(i);
DefaultField field = new DefaultField();
field.setName(column.getDisplayName());
field.setNativeDataType(column.getDataType());
field.setDerivedDataType(column.getDataType());
field.setDataTypeDescriptor(ParserHelper.hiveTypeToDescriptor(column.getDataType()));
// strip the precisionScale and assign to the field property
setPrecisionAndScale(field);
// Add sample values
List<List<Object>> values = result.getRows();
for (List<Object> colMap : values) {
Object oVal = colMap.get(i);
if (oVal != null) {
field.getSampleValues().add(oVal.toString());
}
}
fields.add(field);
}
schema.setFields(fields);
return schema;
}
Aggregations