Search in sources :

Example 1 with ArrayObject

use of org.apache.carbondata.processing.loading.complexobjects.ArrayObject in project carbondata by apache.

the class ArrayDataType method writeByteArray.

@Override
public void writeByteArray(ArrayObject input, DataOutputStream dataOutputStream) throws IOException, DictionaryGenerationException {
    if (input == null) {
        dataOutputStream.writeInt(1);
        children.writeByteArray(null, dataOutputStream);
    } else {
        Object[] data = input.getData();
        dataOutputStream.writeInt(data.length);
        for (Object eachInput : data) {
            children.writeByteArray(eachInput, dataOutputStream);
        }
    }
}
Also used : ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject)

Example 2 with ArrayObject

use of org.apache.carbondata.processing.loading.complexobjects.ArrayObject in project carbondata by apache.

the class AvroCarbonWriter method avroFieldToObjectForUnionType.

/**
 * fill fieldvalue for union type
 *
 * @param avroField
 * @param fieldValue
 * @param avroFields
 * @return
 */
private Object avroFieldToObjectForUnionType(Schema avroField, Object fieldValue, Schema.Field avroFields) {
    Object out = null;
    Schema.Type type = avroField.getType();
    LogicalType logicalType = avroField.getLogicalType();
    switch(type) {
        case RECORD:
            if (fieldValue instanceof GenericData.Record) {
                List<Schema.Field> fields = avroField.getFields();
                Object[] structChildObjects = new Object[fields.size()];
                for (int i = 0; i < fields.size(); i++) {
                    Object childObject = avroFieldToObject(fields.get(i), ((GenericData.Record) fieldValue).get(i));
                    if (childObject != null) {
                        structChildObjects[i] = childObject;
                    }
                }
                out = new StructObject(structChildObjects);
            } else {
                out = null;
            }
            break;
        case ARRAY:
            if (fieldValue instanceof GenericData.Array || fieldValue instanceof ArrayList) {
                Object[] arrayChildObjects;
                if (fieldValue instanceof GenericData.Array) {
                    int size = ((GenericData.Array) fieldValue).size();
                    arrayChildObjects = new Object[size];
                    for (int i = 0; i < size; i++) {
                        Object childObject = avroFieldToObject(new Schema.Field(avroFields.name(), avroField.getElementType(), avroFields.doc(), avroFields.defaultVal()), ((GenericData.Array) fieldValue).get(i));
                        if (childObject != null) {
                            arrayChildObjects[i] = childObject;
                        }
                    }
                } else {
                    int size = ((ArrayList) fieldValue).size();
                    arrayChildObjects = new Object[size];
                    for (int i = 0; i < size; i++) {
                        Object childObject = avroFieldToObject(new Schema.Field(avroFields.name(), avroField.getElementType(), avroFields.doc(), avroFields.defaultVal()), ((ArrayList) fieldValue).get(i));
                        if (childObject != null) {
                            arrayChildObjects[i] = childObject;
                        }
                    }
                }
                out = new ArrayObject(arrayChildObjects);
            } else {
                out = null;
            }
            break;
        case MAP:
            // Map will be internally stored as Array<Struct<Key,Value>>
            if (fieldValue instanceof HashMap) {
                Map mapEntries = (HashMap) fieldValue;
                Object[] arrayMapChildObjects = new Object[mapEntries.size()];
                if (!mapEntries.isEmpty()) {
                    Iterator iterator = mapEntries.entrySet().iterator();
                    int counter = 0;
                    while (iterator.hasNext()) {
                        // size is 2 because map will have key and value
                        Object[] mapChildObjects = new Object[2];
                        Map.Entry mapEntry = (Map.Entry) iterator.next();
                        // evaluate key
                        Object keyObject = avroFieldToObject(new Schema.Field(avroFields.name(), Schema.create(Schema.Type.STRING), avroFields.doc(), avroFields.defaultVal()), mapEntry.getKey());
                        // evaluate value
                        Object valueObject = avroFieldToObject(new Schema.Field(avroFields.name(), avroField.getValueType(), avroFields.doc(), avroFields.defaultVal()), mapEntry.getValue());
                        if (keyObject != null) {
                            mapChildObjects[0] = keyObject;
                        }
                        if (valueObject != null) {
                            mapChildObjects[1] = valueObject;
                        }
                        StructObject keyValueObject = new StructObject(mapChildObjects);
                        arrayMapChildObjects[counter++] = keyValueObject;
                    }
                }
                out = new ArrayObject(arrayMapChildObjects);
            } else {
                out = null;
            }
            break;
        case BYTES:
            // set to "decimal" and a specified precision and scale
            if (logicalType instanceof LogicalTypes.Decimal) {
                out = extractDecimalValue(fieldValue, ((LogicalTypes.Decimal) avroField.getLogicalType()).getScale(), ((LogicalTypes.Decimal) avroField.getLogicalType()).getPrecision());
            }
            break;
        default:
            out = avroPrimitiveFieldToObject(type, logicalType, fieldValue);
    }
    return out;
}
Also used : HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) LogicalType(org.apache.avro.LogicalType) GenericData(org.apache.avro.generic.GenericData) ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject) StructField(org.apache.carbondata.core.metadata.datatype.StructField) Field(org.apache.carbondata.core.metadata.datatype.Field) BigDecimal(java.math.BigDecimal) StructObject(org.apache.carbondata.processing.loading.complexobjects.StructObject) Iterator(java.util.Iterator) StructObject(org.apache.carbondata.processing.loading.complexobjects.StructObject) ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject) Map(java.util.Map) HashMap(java.util.HashMap)

Example 3 with ArrayObject

use of org.apache.carbondata.processing.loading.complexobjects.ArrayObject in project carbondata by apache.

the class AvroCarbonWriter method avroFieldToObject.

private Object avroFieldToObject(Schema.Field avroField, Object fieldValue) {
    Object out = null;
    Schema.Type type = avroField.schema().getType();
    LogicalType logicalType = avroField.schema().getLogicalType();
    switch(type) {
        case MAP:
            // Note: Avro object takes care of removing the duplicates so we should not handle it again
            // Map will be internally stored as Array<Struct<Key,Value>>
            Map mapEntries = (HashMap) fieldValue;
            Object[] arrayMapChildObjects = new Object[mapEntries.size()];
            if (!mapEntries.isEmpty()) {
                Iterator iterator = mapEntries.entrySet().iterator();
                int counter = 0;
                while (iterator.hasNext()) {
                    // size is 2 because map will have key and value
                    Object[] mapChildObjects = new Object[2];
                    Map.Entry mapEntry = (Map.Entry) iterator.next();
                    // evaluate key
                    Object keyObject = avroFieldToObject(new Schema.Field(avroField.name(), Schema.create(Schema.Type.STRING), avroField.doc(), avroField.defaultVal()), mapEntry.getKey());
                    // evaluate value
                    Object valueObject = avroFieldToObject(new Schema.Field(avroField.name(), avroField.schema().getValueType(), avroField.doc(), avroField.defaultVal()), mapEntry.getValue());
                    if (keyObject != null) {
                        mapChildObjects[0] = keyObject;
                    }
                    if (valueObject != null) {
                        mapChildObjects[1] = valueObject;
                    }
                    StructObject keyValueObject = new StructObject(mapChildObjects);
                    arrayMapChildObjects[counter++] = keyValueObject;
                }
            }
            out = new ArrayObject(arrayMapChildObjects);
            break;
        case RECORD:
            List<Schema.Field> fields = avroField.schema().getFields();
            Object[] structChildObjects = new Object[fields.size()];
            for (int i = 0; i < fields.size(); i++) {
                Object childObject = avroFieldToObject(fields.get(i), ((GenericData.Record) fieldValue).get(i));
                if (childObject != null) {
                    structChildObjects[i] = childObject;
                }
            }
            StructObject structObject = new StructObject(structChildObjects);
            out = structObject;
            break;
        case ARRAY:
            Object[] arrayChildObjects;
            if (fieldValue instanceof GenericData.Array) {
                int size = ((GenericData.Array) fieldValue).size();
                arrayChildObjects = new Object[size];
                for (int i = 0; i < size; i++) {
                    Object childObject = avroFieldToObject(new Schema.Field(avroField.name(), avroField.schema().getElementType(), avroField.doc(), avroField.defaultVal()), ((GenericData.Array) fieldValue).get(i));
                    if (childObject != null) {
                        arrayChildObjects[i] = childObject;
                    }
                }
            } else {
                int size = ((ArrayList) fieldValue).size();
                arrayChildObjects = new Object[size];
                for (int i = 0; i < size; i++) {
                    Object childObject = avroFieldToObject(new Schema.Field(avroField.name(), avroField.schema().getElementType(), avroField.doc(), avroField.defaultVal()), ((ArrayList) fieldValue).get(i));
                    if (childObject != null) {
                        arrayChildObjects[i] = childObject;
                    }
                }
            }
            out = new ArrayObject(arrayChildObjects);
            break;
        case UNION:
            // Union type will be internally stored as Struct<col:type>
            // Fill data object only if fieldvalue is instance of datatype
            // For other field datatypes, fill value as Null
            List<Schema> unionFields = avroField.schema().getTypes();
            int notNullUnionFieldsCount = 0;
            for (Schema unionField : unionFields) {
                if (!unionField.getType().equals(Schema.Type.NULL)) {
                    notNullUnionFieldsCount++;
                }
            }
            Object[] values = new Object[notNullUnionFieldsCount];
            int j = 0;
            for (Schema unionField : unionFields) {
                if (unionField.getType().equals(Schema.Type.NULL)) {
                    continue;
                }
                // hence check for schema also in case of union of multiple record or enum or fixed type
                if (validateUnionFieldValue(unionField.getType(), fieldValue, unionField)) {
                    values[j] = avroFieldToObjectForUnionType(unionField, fieldValue, avroField);
                    break;
                }
                j++;
            }
            out = new StructObject(values);
            break;
        case BYTES:
            // set to "decimal" and a specified precision and scale
            if (logicalType instanceof LogicalTypes.Decimal) {
                out = extractDecimalValue(fieldValue, ((LogicalTypes.Decimal) avroField.schema().getLogicalType()).getScale(), ((LogicalTypes.Decimal) avroField.schema().getLogicalType()).getPrecision());
            }
            break;
        default:
            out = avroPrimitiveFieldToObject(type, logicalType, fieldValue);
    }
    return out;
}
Also used : HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) LogicalType(org.apache.avro.LogicalType) GenericData(org.apache.avro.generic.GenericData) ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject) StructField(org.apache.carbondata.core.metadata.datatype.StructField) Field(org.apache.carbondata.core.metadata.datatype.Field) BigDecimal(java.math.BigDecimal) StructObject(org.apache.carbondata.processing.loading.complexobjects.StructObject) Iterator(java.util.Iterator) StructObject(org.apache.carbondata.processing.loading.complexobjects.StructObject) ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject) Map(java.util.Map) HashMap(java.util.HashMap)

Example 4 with ArrayObject

use of org.apache.carbondata.processing.loading.complexobjects.ArrayObject in project carbondata by apache.

the class JsonRowParser method jsonToCarbonObject.

private Object jsonToCarbonObject(Map<String, Object> jsonNodeMap, CarbonColumn column) {
    DataType type = column.getDataType();
    if (DataTypes.isArrayType(type)) {
        CarbonDimension carbonDimension = (CarbonDimension) column;
        ArrayList array = (ArrayList) jsonNodeMap.get(extractChildColumnName(column));
        if ((array == null) || (array.size() == 0)) {
            return null;
        }
        // stored as array in carbonObject
        Object[] arrayChildObjects = new Object[array.size()];
        for (int i = 0; i < array.size(); i++) {
            // array column will have only one child, hence get(0).
            // But data can have n elements, hence the loop.
            CarbonDimension childCol = carbonDimension.getListOfChildDimensions().get(0);
            arrayChildObjects[i] = jsonChildElementToCarbonChildElement(array.get(i), childCol);
        }
        return new ArrayObject(arrayChildObjects);
    } else if (DataTypes.isStructType(type)) {
        CarbonDimension carbonDimension = (CarbonDimension) column;
        int size = carbonDimension.getNumberOfChild();
        Map<String, Object> jsonMap = (Map<String, Object>) jsonNodeMap.get(extractChildColumnName(column));
        if (jsonMap == null) {
            return null;
        }
        Object[] structChildObjects = new Object[size];
        for (int i = 0; i < size; i++) {
            CarbonDimension childCol = carbonDimension.getListOfChildDimensions().get(i);
            Object childObject = jsonChildElementToCarbonChildElement(jsonMap.get(extractChildColumnName(childCol)), childCol);
            structChildObjects[i] = childObject;
        }
        return new StructObject(structChildObjects);
    } else {
        // primitive type
        if (jsonNodeMap.get(extractChildColumnName(column)) == null) {
            return null;
        }
        return jsonNodeMap.get(extractChildColumnName(column)).toString();
    }
}
Also used : ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject) StructObject(org.apache.carbondata.processing.loading.complexobjects.StructObject) ArrayList(java.util.ArrayList) DataType(org.apache.carbondata.core.metadata.datatype.DataType) StructObject(org.apache.carbondata.processing.loading.complexobjects.StructObject) ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject) TreeMap(java.util.TreeMap) Map(java.util.Map) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 5 with ArrayObject

use of org.apache.carbondata.processing.loading.complexobjects.ArrayObject in project carbondata by apache.

the class ArrayDataType method writeByteArray.

@Override
public void writeByteArray(Object input, DataOutputStream dataOutputStream, BadRecordLogHolder logHolder, Boolean isWithoutConverter, boolean isEmptyBadRecord) throws IOException {
    if (input == null) {
        dataOutputStream.writeInt(1);
        children.writeByteArray(null, dataOutputStream, logHolder, isWithoutConverter, isEmptyBadRecord);
    } else {
        Object[] data = ((ArrayObject) input).getData();
        if (data.length == 1 && data[0] != null && data[0].equals("") && !(children instanceof PrimitiveDataType)) {
            // If child complex column is empty, no need to iterate. Fill empty byte array and return.
            CarbonBadRecordUtil.updateEmptyValue(dataOutputStream, isEmptyBadRecord, logHolder, parentName, DataTypeUtil.valueOf("array"));
            return;
        } else {
            dataOutputStream.writeInt(data.length);
        }
        for (Object eachInput : data) {
            children.writeByteArray(eachInput, dataOutputStream, logHolder, isWithoutConverter, isEmptyBadRecord);
        }
    }
}
Also used : ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject) ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject)

Aggregations

ArrayObject (org.apache.carbondata.processing.loading.complexobjects.ArrayObject)7 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 StructObject (org.apache.carbondata.processing.loading.complexobjects.StructObject)4 HashMap (java.util.HashMap)3 BigDecimal (java.math.BigDecimal)2 Iterator (java.util.Iterator)2 TreeMap (java.util.TreeMap)2 LogicalType (org.apache.avro.LogicalType)2 Schema (org.apache.avro.Schema)2 GenericData (org.apache.avro.generic.GenericData)2 DataType (org.apache.carbondata.core.metadata.datatype.DataType)2 Field (org.apache.carbondata.core.metadata.datatype.Field)2 StructField (org.apache.carbondata.core.metadata.datatype.StructField)2 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)2