Search in sources :

Example 1 with MapType

use of org.apache.carbondata.core.metadata.datatype.MapType in project carbondata by apache.

the class AvroCarbonWriter method prepareSubFields.

private static StructField prepareSubFields(String fieldName, Schema childSchema) {
    Schema.Type type = childSchema.getType();
    LogicalType logicalType = childSchema.getLogicalType();
    switch(type) {
        case BOOLEAN:
            return new StructField(fieldName, DataTypes.BOOLEAN);
        case INT:
            if (logicalType instanceof LogicalTypes.Date) {
                return new StructField(fieldName, DataTypes.DATE);
            } else {
                // which will be mapped to carbon as INT data type
                return new StructField(fieldName, DataTypes.INT);
            }
        case LONG:
            if (logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros) {
                return new StructField(fieldName, DataTypes.TIMESTAMP);
            } else {
                // which will be mapped to carbon as LONG data type
                return new StructField(fieldName, DataTypes.LONG);
            }
        case DOUBLE:
            return new StructField(fieldName, DataTypes.DOUBLE);
        case ENUM:
        case STRING:
            return new StructField(fieldName, DataTypes.STRING);
        case FLOAT:
            return new StructField(fieldName, DataTypes.FLOAT);
        case MAP:
            // recursively get the sub fields
            ArrayList<StructField> keyValueFields = new ArrayList<>();
            // for Avro key dataType is always fixed as String
            StructField keyField = new StructField(fieldName + ".key", DataTypes.STRING);
            StructField valueField = prepareSubFields(fieldName + ".value", childSchema.getValueType());
            if (null != valueField) {
                keyValueFields.add(keyField);
                keyValueFields.add(valueField);
                StructField mapKeyValueField = new StructField(fieldName + ".val", DataTypes.createStructType(keyValueFields), keyValueFields);
                // value dataType will be at position 1 in the fields
                MapType mapType = DataTypes.createMapType(DataTypes.STRING, mapKeyValueField.getDataType());
                List<StructField> mapStructFields = new ArrayList<>();
                mapStructFields.add(mapKeyValueField);
                return new StructField(fieldName, mapType, mapStructFields);
            }
            return null;
        case RECORD:
            // recursively get the sub fields
            ArrayList<StructField> structSubFields = new ArrayList<>();
            for (Schema.Field avroSubField : childSchema.getFields()) {
                StructField structField = prepareSubFields(avroSubField.name(), avroSubField.schema());
                if (structField != null) {
                    structSubFields.add(structField);
                }
            }
            return (new StructField(fieldName, DataTypes.createStructType(structSubFields), structSubFields));
        case ARRAY:
            // recursively get the sub fields
            // array will have only one sub field.
            DataType subType = getMappingDataTypeForCollectionRecord(fieldName, childSchema.getElementType());
            List<StructField> subFields = new ArrayList<>();
            subFields.add(prepareSubFields(childSchema.getName(), childSchema.getElementType()));
            if (subType != null) {
                return (new StructField(fieldName, DataTypes.createArrayType(subType), subFields));
            } else {
                return null;
            }
        case UNION:
            // recursively get the union types
            int i = 0;
            ArrayList<StructField> structSubTypes = new ArrayList<>();
            for (Schema avroSubField : childSchema.getTypes()) {
                StructField structField = prepareSubFields(fieldName + i++, avroSubField);
                if (structField != null) {
                    structSubTypes.add(structField);
                }
            }
            return (new StructField(fieldName, DataTypes.createStructType(structSubTypes), structSubTypes));
        case BYTES:
            // set to "decimal" and a specified precision and scale
            if (logicalType instanceof LogicalTypes.Decimal) {
                int precision = ((LogicalTypes.Decimal) childSchema.getLogicalType()).getPrecision();
                int scale = ((LogicalTypes.Decimal) childSchema.getLogicalType()).getScale();
                return new StructField(fieldName, DataTypes.createDecimalType(precision, scale));
            } else {
                throw new UnsupportedOperationException("carbon not support " + type.toString() + " avro type yet");
            }
        case NULL:
            return null;
        default:
            throw new UnsupportedOperationException("carbon not support " + type.toString() + " avro type yet");
    }
}
Also used : Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) LogicalType(org.apache.avro.LogicalType) LogicalTypes(org.apache.avro.LogicalTypes) MapType(org.apache.carbondata.core.metadata.datatype.MapType) StructField(org.apache.carbondata.core.metadata.datatype.StructField) BigDecimal(java.math.BigDecimal) DataType(org.apache.carbondata.core.metadata.datatype.DataType)

Example 2 with MapType

use of org.apache.carbondata.core.metadata.datatype.MapType in project carbondata by apache.

the class ORCCarbonWriter method orcToCarbonSchemaConverter.

// TO convert ORC schema to carbon schema
private static Field orcToCarbonSchemaConverter(TypeDescription typeDescription, List<String> fieldsName, String colName) {
    Objects.requireNonNull(typeDescription, "orc typeDescription should not be null");
    Objects.requireNonNull(typeDescription.getCategory(), "typeDescription category should not be null");
    if (colName == null) {
        colName = typeDescription.getCategory().getName();
    }
    switch(typeDescription.getCategory()) {
        case BOOLEAN:
            return new Field(colName, "boolean");
        case BYTE:
        case BINARY:
            return new Field(colName, "binary");
        case SHORT:
            return new Field(colName, "short");
        case INT:
            return new Field(colName, "int");
        case LONG:
            return new Field(colName, "long");
        case FLOAT:
            return new Field(colName, "float");
        case DOUBLE:
            return new Field(colName, "double");
        case DECIMAL:
            return new Field(colName, "decimal");
        case STRING:
            return new Field(colName, "string");
        case CHAR:
        case VARCHAR:
            return new Field(colName, "varchar");
        case DATE:
            return new Field(colName, "date");
        case TIMESTAMP:
            return new Field(colName, "timestamp");
        case STRUCT:
            List<TypeDescription> childSchemas = typeDescription.getChildren();
            Field[] childs = new Field[childSchemas.size()];
            childSchema(childs, childSchemas, fieldsName);
            List<StructField> structList = new ArrayList<>();
            for (int i = 0; i < childSchemas.size(); i++) {
                structList.add(new StructField(childs[i].getFieldName(), childs[i].getDataType(), childs[i].getChildren()));
            }
            return new Field(colName, "struct", structList);
        case LIST:
            childSchemas = typeDescription.getChildren();
            childs = new Field[childSchemas.size()];
            childSchema(childs, childSchemas, fieldsName);
            List<StructField> arrayField = new ArrayList<>();
            for (int i = 0; i < childSchemas.size(); i++) {
                arrayField.add(new StructField(childs[i].getFieldName(), childs[i].getDataType(), childs[i].getChildren()));
            }
            return new Field(colName, "array", arrayField);
        case MAP:
            childSchemas = typeDescription.getChildren();
            childs = new Field[childSchemas.size()];
            childSchema(childs, childSchemas, fieldsName);
            ArrayList<StructField> keyValueFields = new ArrayList<>();
            StructField keyField = new StructField(typeDescription.getCategory().getName() + ".key", childs[0].getDataType());
            StructField valueField = new StructField(typeDescription.getCategory().getName() + ".value", childs[1].getDataType(), childs[1].getChildren());
            keyValueFields.add(keyField);
            keyValueFields.add(valueField);
            StructField mapKeyValueField = new StructField(typeDescription.getCategory().getName() + ".val", DataTypes.createStructType(keyValueFields), keyValueFields);
            MapType mapType = DataTypes.createMapType(DataTypes.STRING, mapKeyValueField.getDataType());
            List<StructField> mapStructFields = new ArrayList<>();
            mapStructFields.add(mapKeyValueField);
            return new Field(colName, mapType, mapStructFields);
        default:
            throw new UnsupportedOperationException("carbon not support " + typeDescription.getCategory().getName() + " orc type yet");
    }
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ArrayList(java.util.ArrayList) TypeDescription(org.apache.orc.TypeDescription) MapType(org.apache.carbondata.core.metadata.datatype.MapType)

Example 3 with MapType

use of org.apache.carbondata.core.metadata.datatype.MapType in project carbondata by apache.

the class AvroCarbonWriter method prepareFields.

private static Field prepareFields(Schema.Field avroField) {
    String fieldName = avroField.name();
    Schema childSchema = avroField.schema();
    Schema.Type type = childSchema.getType();
    LogicalType logicalType = childSchema.getLogicalType();
    switch(type) {
        case BOOLEAN:
            return new Field(fieldName, DataTypes.BOOLEAN);
        case INT:
            if (logicalType instanceof LogicalTypes.Date) {
                return new Field(fieldName, DataTypes.DATE);
            } else {
                // which will be mapped to carbon as INT data type
                return new Field(fieldName, DataTypes.INT);
            }
        case LONG:
            if (logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros) {
                return new Field(fieldName, DataTypes.TIMESTAMP);
            } else {
                // which will be mapped to carbon as LONG data type
                return new Field(fieldName, DataTypes.LONG);
            }
        case DOUBLE:
            return new Field(fieldName, DataTypes.DOUBLE);
        case ENUM:
        case STRING:
            return new Field(fieldName, DataTypes.STRING);
        case FLOAT:
            return new Field(fieldName, DataTypes.FLOAT);
        case MAP:
            // recursively get the sub fields
            ArrayList<StructField> mapSubFields = new ArrayList<>();
            StructField mapField = prepareSubFields(fieldName, childSchema);
            if (null != mapField) {
                // key value field will be wrapped inside a map struct field
                StructField keyValueField = mapField.getChildren().get(0);
                // value dataType will be at position 1 in the fields
                DataType valueType = ((StructType) keyValueField.getDataType()).getFields().get(1).getDataType();
                MapType mapType = DataTypes.createMapType(DataTypes.STRING, valueType);
                mapSubFields.add(keyValueField);
                return new Field(fieldName, mapType, mapSubFields);
            }
            return null;
        case RECORD:
            // recursively get the sub fields
            ArrayList<StructField> structSubFields = new ArrayList<>();
            for (Schema.Field avroSubField : childSchema.getFields()) {
                StructField structField = prepareSubFields(avroSubField.name(), avroSubField.schema());
                if (structField != null) {
                    structSubFields.add(structField);
                }
            }
            return new Field(fieldName, "struct", structSubFields);
        case ARRAY:
            // recursively get the sub fields
            ArrayList<StructField> arraySubField = new ArrayList<>();
            // array will have only one sub field.
            StructField structField = prepareSubFields(fieldName, childSchema.getElementType());
            if (structField != null) {
                arraySubField.add(structField);
                return new Field(fieldName, "array", arraySubField);
            } else {
                return null;
            }
        case UNION:
            int i = 0;
            // Get union types and store as Struct<type>
            ArrayList<StructField> unionFields = new ArrayList<>();
            for (Schema avroSubField : avroField.schema().getTypes()) {
                if (!avroSubField.getType().equals(Schema.Type.NULL)) {
                    StructField unionField = prepareSubFields(avroField.name() + i++, avroSubField);
                    if (unionField != null) {
                        unionFields.add(unionField);
                    }
                }
            }
            if (unionFields.isEmpty()) {
                throw new UnsupportedOperationException("Carbon do not support Avro UNION with only null type");
            }
            return new Field(fieldName, "struct", unionFields);
        case BYTES:
            // set to "decimal" and a specified precision and scale
            if (logicalType instanceof LogicalTypes.Decimal) {
                int precision = ((LogicalTypes.Decimal) childSchema.getLogicalType()).getPrecision();
                int scale = ((LogicalTypes.Decimal) childSchema.getLogicalType()).getScale();
                return new Field(fieldName, DataTypes.createDecimalType(precision, scale));
            } else {
                throw new UnsupportedOperationException("carbon not support " + type.toString() + " avro type yet");
            }
        case NULL:
            return null;
        default:
            throw new UnsupportedOperationException("carbon not support " + type.toString() + " avro type yet");
    }
}
Also used : StructType(org.apache.carbondata.core.metadata.datatype.StructType) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) LogicalType(org.apache.avro.LogicalType) LogicalTypes(org.apache.avro.LogicalTypes) MapType(org.apache.carbondata.core.metadata.datatype.MapType) StructField(org.apache.carbondata.core.metadata.datatype.StructField) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) BigDecimal(java.math.BigDecimal) DataType(org.apache.carbondata.core.metadata.datatype.DataType)

Aggregations

ArrayList (java.util.ArrayList)3 MapType (org.apache.carbondata.core.metadata.datatype.MapType)3 StructField (org.apache.carbondata.core.metadata.datatype.StructField)3 BigDecimal (java.math.BigDecimal)2 LogicalType (org.apache.avro.LogicalType)2 LogicalTypes (org.apache.avro.LogicalTypes)2 Schema (org.apache.avro.Schema)2 DataType (org.apache.carbondata.core.metadata.datatype.DataType)2 Field (org.apache.carbondata.core.metadata.datatype.Field)2 StructType (org.apache.carbondata.core.metadata.datatype.StructType)1 TypeDescription (org.apache.orc.TypeDescription)1