Search in sources :

Example 71 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class ORCCarbonWriter method orcToCarbonSchemaConverter.

// TO convert ORC schema to carbon schema
private static Field orcToCarbonSchemaConverter(TypeDescription typeDescription, List<String> fieldsName, String colName) {
    Objects.requireNonNull(typeDescription, "orc typeDescription should not be null");
    Objects.requireNonNull(typeDescription.getCategory(), "typeDescription category should not be null");
    if (colName == null) {
        colName = typeDescription.getCategory().getName();
    }
    switch(typeDescription.getCategory()) {
        case BOOLEAN:
            return new Field(colName, "boolean");
        case BYTE:
        case BINARY:
            return new Field(colName, "binary");
        case SHORT:
            return new Field(colName, "short");
        case INT:
            return new Field(colName, "int");
        case LONG:
            return new Field(colName, "long");
        case FLOAT:
            return new Field(colName, "float");
        case DOUBLE:
            return new Field(colName, "double");
        case DECIMAL:
            return new Field(colName, "decimal");
        case STRING:
            return new Field(colName, "string");
        case CHAR:
        case VARCHAR:
            return new Field(colName, "varchar");
        case DATE:
            return new Field(colName, "date");
        case TIMESTAMP:
            return new Field(colName, "timestamp");
        case STRUCT:
            List<TypeDescription> childSchemas = typeDescription.getChildren();
            Field[] childs = new Field[childSchemas.size()];
            childSchema(childs, childSchemas, fieldsName);
            List<StructField> structList = new ArrayList<>();
            for (int i = 0; i < childSchemas.size(); i++) {
                structList.add(new StructField(childs[i].getFieldName(), childs[i].getDataType(), childs[i].getChildren()));
            }
            return new Field(colName, "struct", structList);
        case LIST:
            childSchemas = typeDescription.getChildren();
            childs = new Field[childSchemas.size()];
            childSchema(childs, childSchemas, fieldsName);
            List<StructField> arrayField = new ArrayList<>();
            for (int i = 0; i < childSchemas.size(); i++) {
                arrayField.add(new StructField(childs[i].getFieldName(), childs[i].getDataType(), childs[i].getChildren()));
            }
            return new Field(colName, "array", arrayField);
        case MAP:
            childSchemas = typeDescription.getChildren();
            childs = new Field[childSchemas.size()];
            childSchema(childs, childSchemas, fieldsName);
            ArrayList<StructField> keyValueFields = new ArrayList<>();
            StructField keyField = new StructField(typeDescription.getCategory().getName() + ".key", childs[0].getDataType());
            StructField valueField = new StructField(typeDescription.getCategory().getName() + ".value", childs[1].getDataType(), childs[1].getChildren());
            keyValueFields.add(keyField);
            keyValueFields.add(valueField);
            StructField mapKeyValueField = new StructField(typeDescription.getCategory().getName() + ".val", DataTypes.createStructType(keyValueFields), keyValueFields);
            MapType mapType = DataTypes.createMapType(DataTypes.STRING, mapKeyValueField.getDataType());
            List<StructField> mapStructFields = new ArrayList<>();
            mapStructFields.add(mapKeyValueField);
            return new Field(colName, mapType, mapStructFields);
        default:
            throw new UnsupportedOperationException("carbon not support " + typeDescription.getCategory().getName() + " orc type yet");
    }
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ArrayList(java.util.ArrayList) TypeDescription(org.apache.orc.TypeDescription) MapType(org.apache.carbondata.core.metadata.datatype.MapType)

Example 72 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class Schema method parseJson.

/**
 * Create a Schema using JSON string, for example:
 * [
 *   {"name":"string"},
 *   {"age":"int"}
 * ]
 * @param json specified as string
 * @return Schema
 */
public static Schema parseJson(String json) {
    GsonBuilder gsonBuilder = new GsonBuilder();
    gsonBuilder.registerTypeAdapter(Field.class, new TypeAdapter<Field>() {

        @Override
        public void write(JsonWriter out, Field field) {
        // noop
        }

        @Override
        public Field read(JsonReader in) throws IOException {
            in.beginObject();
            Field field = new Field(in.nextName(), in.nextString());
            in.endObject();
            return field;
        }
    });
    Field[] fields = gsonBuilder.create().fromJson(json, Field[].class);
    return new Schema(fields);
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) GsonBuilder(com.google.gson.GsonBuilder) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) JsonReader(com.google.gson.stream.JsonReader) IOException(java.io.IOException) JsonWriter(com.google.gson.stream.JsonWriter)

Example 73 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonSchemaReader method readSchemaFromFolder.

/**
 * Read schema from carbon file folder path
 *
 * @param folderPath carbon file folder path
 * @param conf       hadoop configuration support, can set s3a AK,SK,
 *                   end point and other conf with this
 * @return carbon data Schema
 * @throws IOException
 */
private static Schema readSchemaFromFolder(String folderPath, Configuration conf) throws IOException {
    String tableName = "UnknownTable" + UUID.randomUUID();
    CarbonTable table = CarbonTable.buildTable(folderPath, tableName, conf);
    List<ColumnSchema> columnSchemaList = table.getTableInfo().getFactTable().getListOfColumns();
    int numOfChildren = 0;
    for (ColumnSchema columnSchema : columnSchemaList) {
        if (!(columnSchema.getColumnName().contains(CarbonCommonConstants.POINT))) {
            numOfChildren++;
        }
    }
    Field[] fields = new Field[numOfChildren];
    int indexOfFields = 0;
    for (ColumnSchema columnSchema : columnSchemaList) {
        if (!columnSchema.getColumnName().contains(CarbonCommonConstants.POINT)) {
            if (DataTypes.isStructType(columnSchema.getDataType())) {
                StructField structField = getStructChildren(table, columnSchema.getColumnName());
                List<StructField> list = new ArrayList<>();
                list.add(structField);
                fields[indexOfFields] = new Field(columnSchema.getColumnName(), DataTypes.createStructType(list));
                fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
                indexOfFields++;
            } else if (DataTypes.isArrayType(columnSchema.getDataType())) {
                StructField structField = getArrayChildren(table, columnSchema.getColumnName());
                List<StructField> list = new ArrayList<>();
                list.add(structField);
                fields[indexOfFields] = new Field(columnSchema.getColumnName(), "array", list);
                fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
                indexOfFields++;
            } else if (DataTypes.isMapType(columnSchema.getDataType())) {
            // TODO
            } else {
                fields[indexOfFields] = new Field(columnSchema);
                fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
                indexOfFields++;
            }
        }
    }
    return new Schema(fields);
}
Also used : CarbonUtil.thriftColumnSchemaToWrapperColumnSchema(org.apache.carbondata.core.util.CarbonUtil.thriftColumnSchemaToWrapperColumnSchema) TableSchema(org.apache.carbondata.core.metadata.schema.table.TableSchema) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) ArrayList(java.util.ArrayList) CarbonUtil.thriftColumnSchemaToWrapperColumnSchema(org.apache.carbondata.core.util.CarbonUtil.thriftColumnSchemaToWrapperColumnSchema) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ArrayList(java.util.ArrayList) List(java.util.List)

Example 74 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class SDKS3Example method main.

public static void main(String[] args) throws Exception {
    Logger logger = LogServiceFactory.getLogService(SDKS3Example.class.getName());
    if (args == null || args.length < 3) {
        logger.error("Usage: java CarbonS3Example: <access-key> <secret-key>" + "<s3-endpoint> [table-path-on-s3] [rows] [Number of writes]");
        System.exit(0);
    }
    String backupProperty = CarbonProperties.getInstance().getProperty(CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH, CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH_DEFAULT);
    CarbonProperties.getInstance().addProperty(CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH, "true");
    String path = "s3a://sdk/WriterOutput";
    if (args.length > 3) {
        path = args[3];
    }
    int rows = 3;
    if (args.length > 4) {
        rows = Integer.parseInt(args[4]);
    }
    int num = 3;
    if (args.length > 5) {
        num = Integer.parseInt(args[5]);
    }
    Configuration conf = new Configuration(true);
    conf.set(Constants.ACCESS_KEY, args[0]);
    conf.set(Constants.SECRET_KEY, args[1]);
    conf.set(Constants.ENDPOINT, args[2]);
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    for (int j = 0; j < num; j++) {
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withHadoopConf(conf).withCsvInput(new Schema(fields)).writtenBy("SDKS3Example").build();
        for (int i = 0; i < rows; i++) {
            writer.write(new String[] { "robot" + (i % 10), String.valueOf(i) });
        }
        writer.close();
    }
    // Read data
    EqualToExpression equalToExpression = new EqualToExpression(new ColumnExpression("name", DataTypes.STRING), new LiteralExpression("robot1", DataTypes.STRING));
    CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age" }).filter(equalToExpression).withHadoopConf(conf).build();
    System.out.println("\nData:");
    int i = 0;
    while (i < 20 && reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        System.out.println(row[0] + " " + row[1]);
        i++;
    }
    System.out.println("\nFinished");
    reader.close();
    // Read without filter
    CarbonReader reader2 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age" }).withHadoopConf(ACCESS_KEY, args[0]).withHadoopConf(SECRET_KEY, args[1]).withHadoopConf(ENDPOINT, args[2]).build();
    System.out.println("\nData:");
    i = 0;
    while (i < 20 && reader2.hasNext()) {
        Object[] row = (Object[]) reader2.readNextRow();
        System.out.println(row[0] + " " + row[1]);
        i++;
    }
    System.out.println("\nFinished");
    reader2.close();
    CarbonProperties.getInstance().addProperty(CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH, backupProperty);
}
Also used : EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) Configuration(org.apache.hadoop.conf.Configuration) Schema(org.apache.carbondata.sdk.file.Schema) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) CarbonWriter(org.apache.carbondata.sdk.file.CarbonWriter) Logger(org.apache.log4j.Logger) Field(org.apache.carbondata.core.metadata.datatype.Field) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) CarbonReader(org.apache.carbondata.sdk.file.CarbonReader)

Example 75 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class SDKS3SchemaReadExample method main.

public static void main(String[] args) throws Exception {
    Logger logger = LogServiceFactory.getLogService(SDKS3SchemaReadExample.class.getName());
    if (args == null || args.length < 3) {
        logger.error("Usage: java CarbonS3Example: <access-key> <secret-key>" + "<s3-endpoint> [table-path-on-s3]");
        System.exit(0);
    }
    String path = "s3a://sdk/WriterOutput/carbondata2/";
    if (args.length > 3) {
        path = args[3];
    }
    Configuration configuration = new Configuration();
    configuration.set(ACCESS_KEY, args[0]);
    configuration.set(SECRET_KEY, args[1]);
    configuration.set(ENDPOINT, args[2]);
    // method 1 to read schema
    Schema schema = CarbonSchemaReader.readSchema(path, true, configuration);
    System.out.println("Schema length is " + schema.getFieldsLength());
    Field[] fields = schema.getFields();
    for (int i = 0; i < fields.length; i++) {
        System.out.println(fields[i] + "\t");
    }
    // method 2 to read schema
    Schema schema2 = CarbonSchemaReader.readSchema(path, configuration);
    System.out.println("Schema length is " + schema2.getFieldsLength());
    Field[] fields2 = schema2.getFields();
    for (int i = 0; i < fields2.length; i++) {
        System.out.println(fields2[i] + "\t");
    }
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) Configuration(org.apache.hadoop.conf.Configuration) Schema(org.apache.carbondata.sdk.file.Schema) Logger(org.apache.log4j.Logger)

Aggregations

Field (org.apache.carbondata.core.metadata.datatype.Field)140 File (java.io.File)111 Test (org.junit.Test)111 StructField (org.apache.carbondata.core.metadata.datatype.StructField)104 IOException (java.io.IOException)55 InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)39 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)27 HashMap (java.util.HashMap)26 ArrayList (java.util.ArrayList)23 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)22 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)21 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)21 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)13 Map (java.util.Map)12 FilenameFilter (java.io.FilenameFilter)11 FilterUtil.prepareEqualToExpression (org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression)10 BufferedInputStream (java.io.BufferedInputStream)9 FileInputStream (java.io.FileInputStream)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)9 FileFilter (java.io.FileFilter)6