Search in sources :

Example 66 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonSchemaReaderTest method setUp.

@Before
public void setUp() throws IOException, InvalidLoadOptionException {
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[12];
    fields[0] = new Field("stringField", DataTypes.STRING);
    fields[1] = new Field("shortField", DataTypes.SHORT);
    fields[2] = new Field("intField", DataTypes.INT);
    fields[3] = new Field("longField", DataTypes.LONG);
    fields[4] = new Field("doubleField", DataTypes.DOUBLE);
    fields[5] = new Field("boolField", DataTypes.BOOLEAN);
    fields[6] = new Field("dateField", DataTypes.DATE);
    fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
    fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
    fields[9] = new Field("varcharField", DataTypes.VARCHAR);
    fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
    fields[11] = new Field("floatField", DataTypes.FLOAT);
    Map<String, String> map = new HashMap<>();
    map.put("complex_delimiter_level_1", "#");
    CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonSchemaReaderTest").build();
    for (int i = 0; i < 10; i++) {
        String[] row2 = new String[] { "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon", "1.23" };
        writer.write(row2);
    }
    writer.close();
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) HashMap(java.util.HashMap) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) Before(org.junit.Before)

Example 67 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class AvroCarbonWriter method getCarbonSchemaFromAvroSchema.

/**
 * converts avro schema to carbon schema required by carbonWriter
 *
 * @param avroSchema avro schema
 * @return carbon sdk schema
 */
public static org.apache.carbondata.sdk.file.Schema getCarbonSchemaFromAvroSchema(Schema avroSchema) {
    Field[] carbonField = new Field[avroSchema.getFields().size()];
    int i = 0;
    for (Schema.Field avroField : avroSchema.getFields()) {
        Field field = prepareFields(avroField);
        if (field != null) {
            carbonField[i] = field;
        }
        i++;
    }
    return new org.apache.carbondata.sdk.file.Schema(carbonField);
}
Also used : StructField(org.apache.carbondata.core.metadata.datatype.StructField) Field(org.apache.carbondata.core.metadata.datatype.Field) Schema(org.apache.avro.Schema)

Example 68 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class AvroCarbonWriter method avroFieldToObject.

private Object avroFieldToObject(Schema.Field avroField, Object fieldValue) {
    Object out = null;
    Schema.Type type = avroField.schema().getType();
    LogicalType logicalType = avroField.schema().getLogicalType();
    switch(type) {
        case MAP:
            // Note: Avro object takes care of removing the duplicates so we should not handle it again
            // Map will be internally stored as Array<Struct<Key,Value>>
            Map mapEntries = (HashMap) fieldValue;
            Object[] arrayMapChildObjects = new Object[mapEntries.size()];
            if (!mapEntries.isEmpty()) {
                Iterator iterator = mapEntries.entrySet().iterator();
                int counter = 0;
                while (iterator.hasNext()) {
                    // size is 2 because map will have key and value
                    Object[] mapChildObjects = new Object[2];
                    Map.Entry mapEntry = (Map.Entry) iterator.next();
                    // evaluate key
                    Object keyObject = avroFieldToObject(new Schema.Field(avroField.name(), Schema.create(Schema.Type.STRING), avroField.doc(), avroField.defaultVal()), mapEntry.getKey());
                    // evaluate value
                    Object valueObject = avroFieldToObject(new Schema.Field(avroField.name(), avroField.schema().getValueType(), avroField.doc(), avroField.defaultVal()), mapEntry.getValue());
                    if (keyObject != null) {
                        mapChildObjects[0] = keyObject;
                    }
                    if (valueObject != null) {
                        mapChildObjects[1] = valueObject;
                    }
                    StructObject keyValueObject = new StructObject(mapChildObjects);
                    arrayMapChildObjects[counter++] = keyValueObject;
                }
            }
            out = new ArrayObject(arrayMapChildObjects);
            break;
        case RECORD:
            List<Schema.Field> fields = avroField.schema().getFields();
            Object[] structChildObjects = new Object[fields.size()];
            for (int i = 0; i < fields.size(); i++) {
                Object childObject = avroFieldToObject(fields.get(i), ((GenericData.Record) fieldValue).get(i));
                if (childObject != null) {
                    structChildObjects[i] = childObject;
                }
            }
            StructObject structObject = new StructObject(structChildObjects);
            out = structObject;
            break;
        case ARRAY:
            Object[] arrayChildObjects;
            if (fieldValue instanceof GenericData.Array) {
                int size = ((GenericData.Array) fieldValue).size();
                arrayChildObjects = new Object[size];
                for (int i = 0; i < size; i++) {
                    Object childObject = avroFieldToObject(new Schema.Field(avroField.name(), avroField.schema().getElementType(), avroField.doc(), avroField.defaultVal()), ((GenericData.Array) fieldValue).get(i));
                    if (childObject != null) {
                        arrayChildObjects[i] = childObject;
                    }
                }
            } else {
                int size = ((ArrayList) fieldValue).size();
                arrayChildObjects = new Object[size];
                for (int i = 0; i < size; i++) {
                    Object childObject = avroFieldToObject(new Schema.Field(avroField.name(), avroField.schema().getElementType(), avroField.doc(), avroField.defaultVal()), ((ArrayList) fieldValue).get(i));
                    if (childObject != null) {
                        arrayChildObjects[i] = childObject;
                    }
                }
            }
            out = new ArrayObject(arrayChildObjects);
            break;
        case UNION:
            // Union type will be internally stored as Struct<col:type>
            // Fill data object only if fieldvalue is instance of datatype
            // For other field datatypes, fill value as Null
            List<Schema> unionFields = avroField.schema().getTypes();
            int notNullUnionFieldsCount = 0;
            for (Schema unionField : unionFields) {
                if (!unionField.getType().equals(Schema.Type.NULL)) {
                    notNullUnionFieldsCount++;
                }
            }
            Object[] values = new Object[notNullUnionFieldsCount];
            int j = 0;
            for (Schema unionField : unionFields) {
                if (unionField.getType().equals(Schema.Type.NULL)) {
                    continue;
                }
                // hence check for schema also in case of union of multiple record or enum or fixed type
                if (validateUnionFieldValue(unionField.getType(), fieldValue, unionField)) {
                    values[j] = avroFieldToObjectForUnionType(unionField, fieldValue, avroField);
                    break;
                }
                j++;
            }
            out = new StructObject(values);
            break;
        case BYTES:
            // set to "decimal" and a specified precision and scale
            if (logicalType instanceof LogicalTypes.Decimal) {
                out = extractDecimalValue(fieldValue, ((LogicalTypes.Decimal) avroField.schema().getLogicalType()).getScale(), ((LogicalTypes.Decimal) avroField.schema().getLogicalType()).getPrecision());
            }
            break;
        default:
            out = avroPrimitiveFieldToObject(type, logicalType, fieldValue);
    }
    return out;
}
Also used : HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) LogicalType(org.apache.avro.LogicalType) GenericData(org.apache.avro.generic.GenericData) ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject) StructField(org.apache.carbondata.core.metadata.datatype.StructField) Field(org.apache.carbondata.core.metadata.datatype.Field) BigDecimal(java.math.BigDecimal) StructObject(org.apache.carbondata.processing.loading.complexobjects.StructObject) Iterator(java.util.Iterator) StructObject(org.apache.carbondata.processing.loading.complexobjects.StructObject) ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject) Map(java.util.Map) HashMap(java.util.HashMap)

Example 69 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonIUD method getExpression.

/**
 * This method will convert the given columnToValue mapping into expression object
 * If columnToValue mapping have following entries:
 * name --> {karan, kunal, vikram}
 * age --> {24}
 * the expression will look like this for above entries:
 * ((name = karan || name = kunal || name = vikram) && (age = 24))
 */
private Expression getExpression(String path, Map<String, Set<String>> columnToValueMapping) throws IOException {
    List<String> indexFiles = getCarbonIndexFile(path);
    Schema schema = CarbonSchemaReader.readSchema(indexFiles.get(0)).asOriginOrder();
    Field[] fields = schema.getFields();
    List<Expression> listOfExpressions = new ArrayList<>();
    for (Map.Entry<String, Set<String>> column : columnToValueMapping.entrySet()) {
        DataType dataType = getColumnDataType(fields, column.getKey());
        List<Expression> listOfOrExpressions = new ArrayList<>();
        for (String value : column.getValue()) {
            listOfOrExpressions.add(new EqualToExpression(new ColumnExpression(column.getKey(), dataType), new LiteralExpression(value, dataType)));
        }
        Expression OrFilterExpression = null;
        if (listOfOrExpressions.size() > 0) {
            OrFilterExpression = listOfOrExpressions.get(0);
        }
        for (int i = 1; i < listOfOrExpressions.size(); i++) {
            OrFilterExpression = new OrExpression(OrFilterExpression, listOfOrExpressions.get(i));
        }
        listOfExpressions.add(OrFilterExpression);
    }
    Expression filterExpression = null;
    if (listOfExpressions.size() > 0) {
        filterExpression = listOfExpressions.get(0);
    }
    for (int i = 1; i < listOfExpressions.size(); i++) {
        filterExpression = new AndExpression(filterExpression, listOfExpressions.get(i));
    }
    return filterExpression;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) ArrayList(java.util.ArrayList) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) Field(org.apache.carbondata.core.metadata.datatype.Field) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) DataType(org.apache.carbondata.core.metadata.datatype.DataType) HashMap(java.util.HashMap) Map(java.util.Map)

Example 70 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonWriterBuilder method buildCarbonTable.

/**
 * Build a {@link CarbonTable}
 */
private CarbonTable buildCarbonTable() {
    TableSchemaBuilder tableSchemaBuilder = TableSchema.builder();
    if (blockSize > 0) {
        tableSchemaBuilder = tableSchemaBuilder.blockSize(blockSize);
    }
    if (blockletSize > 0) {
        tableSchemaBuilder = tableSchemaBuilder.blockletSize(blockletSize);
    }
    if (pageSizeInMb > 0) {
        tableSchemaBuilder = tableSchemaBuilder.pageSizeInMb(pageSizeInMb);
    }
    tableSchemaBuilder.enableLocalDictionary(isLocalDictionaryEnabled);
    tableSchemaBuilder.localDictionaryThreshold(localDictionaryThreshold);
    List<String> sortColumnsList = new ArrayList<>();
    if (sortColumns == null) {
        // user passed size 4 but supplied only 2 fileds
        for (Field field : schema.getFields()) {
            if (null != field) {
                if (field.getDataType() == DataTypes.STRING || field.getDataType() == DataTypes.DATE || field.getDataType() == DataTypes.TIMESTAMP) {
                    sortColumnsList.add(field.getFieldName());
                }
            }
        }
        sortColumns = new String[sortColumnsList.size()];
        sortColumns = sortColumnsList.toArray(sortColumns);
    } else {
        sortColumnsList = Arrays.asList(sortColumns);
    }
    ColumnSchema[] sortColumnsSchemaList = new ColumnSchema[sortColumnsList.size()];
    List<String> invertedIdxColumnsList = new ArrayList<>();
    if (null != invertedIndexColumns) {
        invertedIdxColumnsList = Arrays.asList(invertedIndexColumns);
    }
    Field[] fields = schema.getFields();
    buildTableSchema(fields, tableSchemaBuilder, sortColumnsList, sortColumnsSchemaList, invertedIdxColumnsList);
    tableSchemaBuilder.setSortColumns(Arrays.asList(sortColumnsSchemaList));
    String tableName;
    String dbName;
    dbName = "";
    tableName = "_tempTable-" + UUID.randomUUID().toString() + "_" + timestamp;
    TableSchema schema = tableSchemaBuilder.build();
    schema.setTableName(tableName);
    CarbonTable table = CarbonTable.builder().tableName(schema.getTableName()).databaseName(dbName).tablePath(path).tableSchema(schema).isTransactionalTable(false).build();
    return table;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) TableSchema(org.apache.carbondata.core.metadata.schema.table.TableSchema) ArrayList(java.util.ArrayList) TableSchemaBuilder(org.apache.carbondata.core.metadata.schema.table.TableSchemaBuilder) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)

Aggregations

Field (org.apache.carbondata.core.metadata.datatype.Field)140 File (java.io.File)111 Test (org.junit.Test)111 StructField (org.apache.carbondata.core.metadata.datatype.StructField)104 IOException (java.io.IOException)55 InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)39 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)27 HashMap (java.util.HashMap)26 ArrayList (java.util.ArrayList)23 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)22 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)21 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)21 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)13 Map (java.util.Map)12 FilenameFilter (java.io.FilenameFilter)11 FilterUtil.prepareEqualToExpression (org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression)10 BufferedInputStream (java.io.BufferedInputStream)9 FileInputStream (java.io.FileInputStream)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)9 FileFilter (java.io.FileFilter)6