use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonSchemaReaderTest method setUp.
@Before
public void setUp() throws IOException, InvalidLoadOptionException {
FileUtils.deleteDirectory(new File(path));
Field[] fields = new Field[12];
fields[0] = new Field("stringField", DataTypes.STRING);
fields[1] = new Field("shortField", DataTypes.SHORT);
fields[2] = new Field("intField", DataTypes.INT);
fields[3] = new Field("longField", DataTypes.LONG);
fields[4] = new Field("doubleField", DataTypes.DOUBLE);
fields[5] = new Field("boolField", DataTypes.BOOLEAN);
fields[6] = new Field("dateField", DataTypes.DATE);
fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
fields[9] = new Field("varcharField", DataTypes.VARCHAR);
fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
fields[11] = new Field("floatField", DataTypes.FLOAT);
Map<String, String> map = new HashMap<>();
map.put("complex_delimiter_level_1", "#");
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonSchemaReaderTest").build();
for (int i = 0; i < 10; i++) {
String[] row2 = new String[] { "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon", "1.23" };
writer.write(row2);
}
writer.close();
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class AvroCarbonWriter method getCarbonSchemaFromAvroSchema.
/**
* converts avro schema to carbon schema required by carbonWriter
*
* @param avroSchema avro schema
* @return carbon sdk schema
*/
public static org.apache.carbondata.sdk.file.Schema getCarbonSchemaFromAvroSchema(Schema avroSchema) {
Field[] carbonField = new Field[avroSchema.getFields().size()];
int i = 0;
for (Schema.Field avroField : avroSchema.getFields()) {
Field field = prepareFields(avroField);
if (field != null) {
carbonField[i] = field;
}
i++;
}
return new org.apache.carbondata.sdk.file.Schema(carbonField);
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class AvroCarbonWriter method avroFieldToObject.
private Object avroFieldToObject(Schema.Field avroField, Object fieldValue) {
Object out = null;
Schema.Type type = avroField.schema().getType();
LogicalType logicalType = avroField.schema().getLogicalType();
switch(type) {
case MAP:
// Note: Avro object takes care of removing the duplicates so we should not handle it again
// Map will be internally stored as Array<Struct<Key,Value>>
Map mapEntries = (HashMap) fieldValue;
Object[] arrayMapChildObjects = new Object[mapEntries.size()];
if (!mapEntries.isEmpty()) {
Iterator iterator = mapEntries.entrySet().iterator();
int counter = 0;
while (iterator.hasNext()) {
// size is 2 because map will have key and value
Object[] mapChildObjects = new Object[2];
Map.Entry mapEntry = (Map.Entry) iterator.next();
// evaluate key
Object keyObject = avroFieldToObject(new Schema.Field(avroField.name(), Schema.create(Schema.Type.STRING), avroField.doc(), avroField.defaultVal()), mapEntry.getKey());
// evaluate value
Object valueObject = avroFieldToObject(new Schema.Field(avroField.name(), avroField.schema().getValueType(), avroField.doc(), avroField.defaultVal()), mapEntry.getValue());
if (keyObject != null) {
mapChildObjects[0] = keyObject;
}
if (valueObject != null) {
mapChildObjects[1] = valueObject;
}
StructObject keyValueObject = new StructObject(mapChildObjects);
arrayMapChildObjects[counter++] = keyValueObject;
}
}
out = new ArrayObject(arrayMapChildObjects);
break;
case RECORD:
List<Schema.Field> fields = avroField.schema().getFields();
Object[] structChildObjects = new Object[fields.size()];
for (int i = 0; i < fields.size(); i++) {
Object childObject = avroFieldToObject(fields.get(i), ((GenericData.Record) fieldValue).get(i));
if (childObject != null) {
structChildObjects[i] = childObject;
}
}
StructObject structObject = new StructObject(structChildObjects);
out = structObject;
break;
case ARRAY:
Object[] arrayChildObjects;
if (fieldValue instanceof GenericData.Array) {
int size = ((GenericData.Array) fieldValue).size();
arrayChildObjects = new Object[size];
for (int i = 0; i < size; i++) {
Object childObject = avroFieldToObject(new Schema.Field(avroField.name(), avroField.schema().getElementType(), avroField.doc(), avroField.defaultVal()), ((GenericData.Array) fieldValue).get(i));
if (childObject != null) {
arrayChildObjects[i] = childObject;
}
}
} else {
int size = ((ArrayList) fieldValue).size();
arrayChildObjects = new Object[size];
for (int i = 0; i < size; i++) {
Object childObject = avroFieldToObject(new Schema.Field(avroField.name(), avroField.schema().getElementType(), avroField.doc(), avroField.defaultVal()), ((ArrayList) fieldValue).get(i));
if (childObject != null) {
arrayChildObjects[i] = childObject;
}
}
}
out = new ArrayObject(arrayChildObjects);
break;
case UNION:
// Union type will be internally stored as Struct<col:type>
// Fill data object only if fieldvalue is instance of datatype
// For other field datatypes, fill value as Null
List<Schema> unionFields = avroField.schema().getTypes();
int notNullUnionFieldsCount = 0;
for (Schema unionField : unionFields) {
if (!unionField.getType().equals(Schema.Type.NULL)) {
notNullUnionFieldsCount++;
}
}
Object[] values = new Object[notNullUnionFieldsCount];
int j = 0;
for (Schema unionField : unionFields) {
if (unionField.getType().equals(Schema.Type.NULL)) {
continue;
}
// hence check for schema also in case of union of multiple record or enum or fixed type
if (validateUnionFieldValue(unionField.getType(), fieldValue, unionField)) {
values[j] = avroFieldToObjectForUnionType(unionField, fieldValue, avroField);
break;
}
j++;
}
out = new StructObject(values);
break;
case BYTES:
// set to "decimal" and a specified precision and scale
if (logicalType instanceof LogicalTypes.Decimal) {
out = extractDecimalValue(fieldValue, ((LogicalTypes.Decimal) avroField.schema().getLogicalType()).getScale(), ((LogicalTypes.Decimal) avroField.schema().getLogicalType()).getPrecision());
}
break;
default:
out = avroPrimitiveFieldToObject(type, logicalType, fieldValue);
}
return out;
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonIUD method getExpression.
/**
* This method will convert the given columnToValue mapping into expression object
* If columnToValue mapping have following entries:
* name --> {karan, kunal, vikram}
* age --> {24}
* the expression will look like this for above entries:
* ((name = karan || name = kunal || name = vikram) && (age = 24))
*/
private Expression getExpression(String path, Map<String, Set<String>> columnToValueMapping) throws IOException {
List<String> indexFiles = getCarbonIndexFile(path);
Schema schema = CarbonSchemaReader.readSchema(indexFiles.get(0)).asOriginOrder();
Field[] fields = schema.getFields();
List<Expression> listOfExpressions = new ArrayList<>();
for (Map.Entry<String, Set<String>> column : columnToValueMapping.entrySet()) {
DataType dataType = getColumnDataType(fields, column.getKey());
List<Expression> listOfOrExpressions = new ArrayList<>();
for (String value : column.getValue()) {
listOfOrExpressions.add(new EqualToExpression(new ColumnExpression(column.getKey(), dataType), new LiteralExpression(value, dataType)));
}
Expression OrFilterExpression = null;
if (listOfOrExpressions.size() > 0) {
OrFilterExpression = listOfOrExpressions.get(0);
}
for (int i = 1; i < listOfOrExpressions.size(); i++) {
OrFilterExpression = new OrExpression(OrFilterExpression, listOfOrExpressions.get(i));
}
listOfExpressions.add(OrFilterExpression);
}
Expression filterExpression = null;
if (listOfExpressions.size() > 0) {
filterExpression = listOfExpressions.get(0);
}
for (int i = 1; i < listOfExpressions.size(); i++) {
filterExpression = new AndExpression(filterExpression, listOfExpressions.get(i));
}
return filterExpression;
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonWriterBuilder method buildCarbonTable.
/**
* Build a {@link CarbonTable}
*/
private CarbonTable buildCarbonTable() {
TableSchemaBuilder tableSchemaBuilder = TableSchema.builder();
if (blockSize > 0) {
tableSchemaBuilder = tableSchemaBuilder.blockSize(blockSize);
}
if (blockletSize > 0) {
tableSchemaBuilder = tableSchemaBuilder.blockletSize(blockletSize);
}
if (pageSizeInMb > 0) {
tableSchemaBuilder = tableSchemaBuilder.pageSizeInMb(pageSizeInMb);
}
tableSchemaBuilder.enableLocalDictionary(isLocalDictionaryEnabled);
tableSchemaBuilder.localDictionaryThreshold(localDictionaryThreshold);
List<String> sortColumnsList = new ArrayList<>();
if (sortColumns == null) {
// user passed size 4 but supplied only 2 fileds
for (Field field : schema.getFields()) {
if (null != field) {
if (field.getDataType() == DataTypes.STRING || field.getDataType() == DataTypes.DATE || field.getDataType() == DataTypes.TIMESTAMP) {
sortColumnsList.add(field.getFieldName());
}
}
}
sortColumns = new String[sortColumnsList.size()];
sortColumns = sortColumnsList.toArray(sortColumns);
} else {
sortColumnsList = Arrays.asList(sortColumns);
}
ColumnSchema[] sortColumnsSchemaList = new ColumnSchema[sortColumnsList.size()];
List<String> invertedIdxColumnsList = new ArrayList<>();
if (null != invertedIndexColumns) {
invertedIdxColumnsList = Arrays.asList(invertedIndexColumns);
}
Field[] fields = schema.getFields();
buildTableSchema(fields, tableSchemaBuilder, sortColumnsList, sortColumnsSchemaList, invertedIdxColumnsList);
tableSchemaBuilder.setSortColumns(Arrays.asList(sortColumnsSchemaList));
String tableName;
String dbName;
dbName = "";
tableName = "_tempTable-" + UUID.randomUUID().toString() + "_" + timestamp;
TableSchema schema = tableSchemaBuilder.build();
schema.setTableName(tableName);
CarbonTable table = CarbonTable.builder().tableName(schema.getTableName()).databaseName(dbName).tablePath(path).tableSchema(schema).isTransactionalTable(false).build();
return table;
}
Aggregations