use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class ORCCarbonWriter method orcToCarbonSchemaConverter.
// TO convert ORC schema to carbon schema
private static Field orcToCarbonSchemaConverter(TypeDescription typeDescription, List<String> fieldsName, String colName) {
Objects.requireNonNull(typeDescription, "orc typeDescription should not be null");
Objects.requireNonNull(typeDescription.getCategory(), "typeDescription category should not be null");
if (colName == null) {
colName = typeDescription.getCategory().getName();
}
switch(typeDescription.getCategory()) {
case BOOLEAN:
return new Field(colName, "boolean");
case BYTE:
case BINARY:
return new Field(colName, "binary");
case SHORT:
return new Field(colName, "short");
case INT:
return new Field(colName, "int");
case LONG:
return new Field(colName, "long");
case FLOAT:
return new Field(colName, "float");
case DOUBLE:
return new Field(colName, "double");
case DECIMAL:
return new Field(colName, "decimal");
case STRING:
return new Field(colName, "string");
case CHAR:
case VARCHAR:
return new Field(colName, "varchar");
case DATE:
return new Field(colName, "date");
case TIMESTAMP:
return new Field(colName, "timestamp");
case STRUCT:
List<TypeDescription> childSchemas = typeDescription.getChildren();
Field[] childs = new Field[childSchemas.size()];
childSchema(childs, childSchemas, fieldsName);
List<StructField> structList = new ArrayList<>();
for (int i = 0; i < childSchemas.size(); i++) {
structList.add(new StructField(childs[i].getFieldName(), childs[i].getDataType(), childs[i].getChildren()));
}
return new Field(colName, "struct", structList);
case LIST:
childSchemas = typeDescription.getChildren();
childs = new Field[childSchemas.size()];
childSchema(childs, childSchemas, fieldsName);
List<StructField> arrayField = new ArrayList<>();
for (int i = 0; i < childSchemas.size(); i++) {
arrayField.add(new StructField(childs[i].getFieldName(), childs[i].getDataType(), childs[i].getChildren()));
}
return new Field(colName, "array", arrayField);
case MAP:
childSchemas = typeDescription.getChildren();
childs = new Field[childSchemas.size()];
childSchema(childs, childSchemas, fieldsName);
ArrayList<StructField> keyValueFields = new ArrayList<>();
StructField keyField = new StructField(typeDescription.getCategory().getName() + ".key", childs[0].getDataType());
StructField valueField = new StructField(typeDescription.getCategory().getName() + ".value", childs[1].getDataType(), childs[1].getChildren());
keyValueFields.add(keyField);
keyValueFields.add(valueField);
StructField mapKeyValueField = new StructField(typeDescription.getCategory().getName() + ".val", DataTypes.createStructType(keyValueFields), keyValueFields);
MapType mapType = DataTypes.createMapType(DataTypes.STRING, mapKeyValueField.getDataType());
List<StructField> mapStructFields = new ArrayList<>();
mapStructFields.add(mapKeyValueField);
return new Field(colName, mapType, mapStructFields);
default:
throw new UnsupportedOperationException("carbon not support " + typeDescription.getCategory().getName() + " orc type yet");
}
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class Schema method parseJson.
/**
* Create a Schema using JSON string, for example:
* [
* {"name":"string"},
* {"age":"int"}
* ]
* @param json specified as string
* @return Schema
*/
public static Schema parseJson(String json) {
GsonBuilder gsonBuilder = new GsonBuilder();
gsonBuilder.registerTypeAdapter(Field.class, new TypeAdapter<Field>() {
@Override
public void write(JsonWriter out, Field field) {
// noop
}
@Override
public Field read(JsonReader in) throws IOException {
in.beginObject();
Field field = new Field(in.nextName(), in.nextString());
in.endObject();
return field;
}
});
Field[] fields = gsonBuilder.create().fromJson(json, Field[].class);
return new Schema(fields);
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonSchemaReader method readSchemaFromFolder.
/**
* Read schema from carbon file folder path
*
* @param folderPath carbon file folder path
* @param conf hadoop configuration support, can set s3a AK,SK,
* end point and other conf with this
* @return carbon data Schema
* @throws IOException
*/
private static Schema readSchemaFromFolder(String folderPath, Configuration conf) throws IOException {
String tableName = "UnknownTable" + UUID.randomUUID();
CarbonTable table = CarbonTable.buildTable(folderPath, tableName, conf);
List<ColumnSchema> columnSchemaList = table.getTableInfo().getFactTable().getListOfColumns();
int numOfChildren = 0;
for (ColumnSchema columnSchema : columnSchemaList) {
if (!(columnSchema.getColumnName().contains(CarbonCommonConstants.POINT))) {
numOfChildren++;
}
}
Field[] fields = new Field[numOfChildren];
int indexOfFields = 0;
for (ColumnSchema columnSchema : columnSchemaList) {
if (!columnSchema.getColumnName().contains(CarbonCommonConstants.POINT)) {
if (DataTypes.isStructType(columnSchema.getDataType())) {
StructField structField = getStructChildren(table, columnSchema.getColumnName());
List<StructField> list = new ArrayList<>();
list.add(structField);
fields[indexOfFields] = new Field(columnSchema.getColumnName(), DataTypes.createStructType(list));
fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
indexOfFields++;
} else if (DataTypes.isArrayType(columnSchema.getDataType())) {
StructField structField = getArrayChildren(table, columnSchema.getColumnName());
List<StructField> list = new ArrayList<>();
list.add(structField);
fields[indexOfFields] = new Field(columnSchema.getColumnName(), "array", list);
fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
indexOfFields++;
} else if (DataTypes.isMapType(columnSchema.getDataType())) {
// TODO
} else {
fields[indexOfFields] = new Field(columnSchema);
fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
indexOfFields++;
}
}
}
return new Schema(fields);
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class SDKS3Example method main.
public static void main(String[] args) throws Exception {
Logger logger = LogServiceFactory.getLogService(SDKS3Example.class.getName());
if (args == null || args.length < 3) {
logger.error("Usage: java CarbonS3Example: <access-key> <secret-key>" + "<s3-endpoint> [table-path-on-s3] [rows] [Number of writes]");
System.exit(0);
}
String backupProperty = CarbonProperties.getInstance().getProperty(CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH, CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH_DEFAULT);
CarbonProperties.getInstance().addProperty(CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH, "true");
String path = "s3a://sdk/WriterOutput";
if (args.length > 3) {
path = args[3];
}
int rows = 3;
if (args.length > 4) {
rows = Integer.parseInt(args[4]);
}
int num = 3;
if (args.length > 5) {
num = Integer.parseInt(args[5]);
}
Configuration conf = new Configuration(true);
conf.set(Constants.ACCESS_KEY, args[0]);
conf.set(Constants.SECRET_KEY, args[1]);
conf.set(Constants.ENDPOINT, args[2]);
Field[] fields = new Field[2];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withHadoopConf(conf).withCsvInput(new Schema(fields)).writtenBy("SDKS3Example").build();
for (int i = 0; i < rows; i++) {
writer.write(new String[] { "robot" + (i % 10), String.valueOf(i) });
}
writer.close();
}
// Read data
EqualToExpression equalToExpression = new EqualToExpression(new ColumnExpression("name", DataTypes.STRING), new LiteralExpression("robot1", DataTypes.STRING));
CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age" }).filter(equalToExpression).withHadoopConf(conf).build();
System.out.println("\nData:");
int i = 0;
while (i < 20 && reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
System.out.println(row[0] + " " + row[1]);
i++;
}
System.out.println("\nFinished");
reader.close();
// Read without filter
CarbonReader reader2 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age" }).withHadoopConf(ACCESS_KEY, args[0]).withHadoopConf(SECRET_KEY, args[1]).withHadoopConf(ENDPOINT, args[2]).build();
System.out.println("\nData:");
i = 0;
while (i < 20 && reader2.hasNext()) {
Object[] row = (Object[]) reader2.readNextRow();
System.out.println(row[0] + " " + row[1]);
i++;
}
System.out.println("\nFinished");
reader2.close();
CarbonProperties.getInstance().addProperty(CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH, backupProperty);
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class SDKS3SchemaReadExample method main.
public static void main(String[] args) throws Exception {
Logger logger = LogServiceFactory.getLogService(SDKS3SchemaReadExample.class.getName());
if (args == null || args.length < 3) {
logger.error("Usage: java CarbonS3Example: <access-key> <secret-key>" + "<s3-endpoint> [table-path-on-s3]");
System.exit(0);
}
String path = "s3a://sdk/WriterOutput/carbondata2/";
if (args.length > 3) {
path = args[3];
}
Configuration configuration = new Configuration();
configuration.set(ACCESS_KEY, args[0]);
configuration.set(SECRET_KEY, args[1]);
configuration.set(ENDPOINT, args[2]);
// method 1 to read schema
Schema schema = CarbonSchemaReader.readSchema(path, true, configuration);
System.out.println("Schema length is " + schema.getFieldsLength());
Field[] fields = schema.getFields();
for (int i = 0; i < fields.length; i++) {
System.out.println(fields[i] + "\t");
}
// method 2 to read schema
Schema schema2 = CarbonSchemaReader.readSchema(path, configuration);
System.out.println("Schema length is " + schema2.getFieldsLength());
Field[] fields2 = schema2.getFields();
for (int i = 0; i < fields2.length; i++) {
System.out.println(fields2[i] + "\t");
}
}
Aggregations