Search in sources :

Example 1 with DefaultHiveSchema

use of com.thinkbiganalytics.discovery.model.DefaultHiveSchema in project kylo by Teradata.

the class FeedIT method assertBinaryColumnData.

private void assertBinaryColumnData(String feedName) {
    LOG.info("Asserting binary CC column data");
    DefaultHiveSchema schema = getHiveSchema("functional_tests", feedName);
    Field ccField = schema.getFields().stream().filter(field -> field.getName().equals("cc")).iterator().next();
    Assert.assertEquals("binary", ccField.getDerivedDataType());
    List<HashMap<String, String>> rows = getHiveQuery("SELECT cc FROM " + "functional_tests" + "." + feedName + " where id = 1");
    Assert.assertEquals(1, rows.size());
    HashMap<String, String> row = rows.get(0);
    // where TmpjMU9UVXlNVGcyTkRreU1ERXhOZz09 is double Base64 encoding for cc field of the first row (6759521864920116),
    // one base64 encoding by our standardiser and second base64 encoding by spring framework for returning binary data
    Assert.assertEquals("TmpjMU9UVXlNVGcyTkRreU1ERXhOZz09", row.get("cc"));
}
Also used : PartitionField(com.thinkbiganalytics.feedmgr.rest.model.schema.PartitionField) Field(com.thinkbiganalytics.discovery.schema.Field) HashMap(java.util.HashMap) DefaultHiveSchema(com.thinkbiganalytics.discovery.model.DefaultHiveSchema)

Example 2 with DefaultHiveSchema

use of com.thinkbiganalytics.discovery.model.DefaultHiveSchema in project kylo by Teradata.

the class CSVFileSchemaParser method convertToTarget.

/**
 * Converts the raw file schema to the target schema with correctly derived types
 *
 * @param target       the target schema
 * @param sourceSchema the source
 * @return the schema
 */
protected Schema convertToTarget(TableSchemaType target, Schema sourceSchema) {
    Schema targetSchema;
    switch(target) {
        case RAW:
            targetSchema = sourceSchema;
            break;
        case HIVE:
            DefaultHiveSchema hiveSchema = new DefaultHiveSchema();
            BeanUtils.copyProperties(sourceSchema, hiveSchema);
            hiveSchema.setHiveFormat(deriveHiveRecordFormat());
            ParserHelper.deriveDataTypes(target, hiveSchema.getFields());
            targetSchema = hiveSchema;
            break;
        case RDBMS:
            DefaultTableSchema rdbmsSchema = new DefaultTableSchema();
            BeanUtils.copyProperties(sourceSchema, rdbmsSchema);
            ParserHelper.deriveDataTypes(target, rdbmsSchema.getFields());
            targetSchema = rdbmsSchema;
            break;
        default:
            throw new IllegalArgumentException(target.name() + " is not supported by this parser");
    }
    return targetSchema;
}
Also used : DefaultFileSchema(com.thinkbiganalytics.discovery.model.DefaultFileSchema) Schema(com.thinkbiganalytics.discovery.schema.Schema) DefaultHiveSchema(com.thinkbiganalytics.discovery.model.DefaultHiveSchema) DefaultTableSchema(com.thinkbiganalytics.discovery.model.DefaultTableSchema) DefaultHiveSchema(com.thinkbiganalytics.discovery.model.DefaultHiveSchema) DefaultTableSchema(com.thinkbiganalytics.discovery.model.DefaultTableSchema)

Example 3 with DefaultHiveSchema

use of com.thinkbiganalytics.discovery.model.DefaultHiveSchema in project kylo by Teradata.

the class SparkFileSchemaParserService method toHiveSchema.

private DefaultHiveSchema toHiveSchema(TransformQueryResult result, SparkFileType fileType) {
    DefaultHiveSchema schema = new DefaultHiveSchema();
    schema.setHiveFormat("STORED AS " + fileType);
    schema.setStructured(true);
    ArrayList<Field> fields = new ArrayList<>();
    List<? extends QueryResultColumn> columns = result.getColumns();
    for (int i = 0; i < columns.size(); ++i) {
        QueryResultColumn column = columns.get(i);
        DefaultField field = new DefaultField();
        field.setName(column.getDisplayName());
        field.setNativeDataType(column.getDataType());
        field.setDerivedDataType(column.getDataType());
        field.setDataTypeDescriptor(ParserHelper.hiveTypeToDescriptor(column.getDataType()));
        // strip the precisionScale and assign to the field property
        setPrecisionAndScale(field);
        // Add sample values
        List<List<Object>> values = result.getRows();
        for (List<Object> colMap : values) {
            Object oVal = colMap.get(i);
            if (oVal != null) {
                field.getSampleValues().add(oVal.toString());
            }
        }
        fields.add(field);
    }
    schema.setFields(fields);
    return schema;
}
Also used : DefaultField(com.thinkbiganalytics.discovery.model.DefaultField) Field(com.thinkbiganalytics.discovery.schema.Field) DefaultHiveSchema(com.thinkbiganalytics.discovery.model.DefaultHiveSchema) ArrayList(java.util.ArrayList) DefaultField(com.thinkbiganalytics.discovery.model.DefaultField) ArrayList(java.util.ArrayList) List(java.util.List) QueryResultColumn(com.thinkbiganalytics.discovery.schema.QueryResultColumn)

Aggregations

DefaultHiveSchema (com.thinkbiganalytics.discovery.model.DefaultHiveSchema)3 Field (com.thinkbiganalytics.discovery.schema.Field)2 DefaultField (com.thinkbiganalytics.discovery.model.DefaultField)1 DefaultFileSchema (com.thinkbiganalytics.discovery.model.DefaultFileSchema)1 DefaultTableSchema (com.thinkbiganalytics.discovery.model.DefaultTableSchema)1 QueryResultColumn (com.thinkbiganalytics.discovery.schema.QueryResultColumn)1 Schema (com.thinkbiganalytics.discovery.schema.Schema)1 PartitionField (com.thinkbiganalytics.feedmgr.rest.model.schema.PartitionField)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1