use of io.cdap.cdap.api.data.schema.Schema in project cdap by caskdata.
the class SchemaTypeAdapter method getFields.
/**
* Get the list of {@link Schema.Field} associated with current RECORD.
* @param recordName the name of the RECORD for which fields to be returned
* @param reader the reader to read the record
* @param knownRecords record names already encountered during the reading
* @return the list of fields associated with the current record
* @throws IOException when error occurs during reading the json
*/
private List<Schema.Field> getFields(String recordName, JsonReader reader, Map<String, Schema> knownRecords) throws IOException {
knownRecords.put(recordName, null);
List<Schema.Field> fieldBuilder = new ArrayList<>();
reader.beginArray();
while (reader.peek() != JsonToken.END_ARRAY) {
reader.beginObject();
String fieldName = null;
Schema innerSchema = null;
while (reader.hasNext()) {
String name = reader.nextName();
switch(name) {
case NAME:
fieldName = reader.nextString();
break;
case TYPE:
innerSchema = read(reader, knownRecords);
break;
default:
reader.skipValue();
}
}
fieldBuilder.add(Schema.Field.of(fieldName, innerSchema));
reader.endObject();
}
reader.endArray();
return fieldBuilder;
}
use of io.cdap.cdap.api.data.schema.Schema in project cdap by caskdata.
the class SchemaTypeAdapter method readObject.
/**
* Read JSON object and return Schema corresponding to it.
* @param reader JsonReader used to read the json object
* @param knownRecords Set of record name already encountered during the reading.
* @return Schema reflecting json
* @throws IOException when error occurs during reading json
*/
private Schema readObject(JsonReader reader, Map<String, Schema> knownRecords) throws IOException {
reader.beginObject();
// Type of the schema
Schema.Type schemaType = null;
// Logical Type of the schema
Schema.LogicalType logicalType = null;
// Name of the element
String elementName = null;
// Store enum values for ENUM type
List<String> enumValues = new ArrayList<>();
// Store schema for key and value for MAP type
Schema keys = null;
Schema values = null;
// List of fields for RECORD type
List<Schema.Field> fields = null;
// List of items for ARRAY type
Schema items = null;
int precision = 0;
int scale = 0;
// For RECORD type fields will be populated
while (reader.hasNext()) {
String name = reader.nextName();
switch(name) {
case LOGICAL_TYPE:
logicalType = Schema.LogicalType.fromToken(reader.nextString());
break;
case PRECISION:
precision = Integer.parseInt(reader.nextString());
break;
case SCALE:
scale = Integer.parseInt(reader.nextString());
break;
case TYPE:
schemaType = Schema.Type.valueOf(reader.nextString().toUpperCase());
break;
case NAME:
elementName = reader.nextString();
if (schemaType == Schema.Type.RECORD) {
/*
Put a null schema in the map for the recursive references.
For example, if we are looking at the outer 'node' reference in the example below, we
add the record name in the knownRecords map, so that when we get to the inner 'node'
reference, we know that its a record type and not a Schema.Type.
{
"type": "record",
"name": "node",
"fields": [{
"name": "children",
"type": [{
"type": "array",
"items": ["node", "null"]
}, "null"]
},
{
"name": "data",
"type": "int"
}]
}
Full schema corresponding to this RECORD will be put in knownRecords once the fields in the
RECORD are explored.
*/
knownRecords.put(elementName, null);
}
break;
case SYMBOLS:
enumValues = readEnum(reader);
break;
case ITEMS:
items = read(reader, knownRecords);
break;
case KEYS:
keys = read(reader, knownRecords);
break;
case VALUES:
values = read(reader, knownRecords);
break;
case FIELDS:
fields = getFields(name, reader, knownRecords);
knownRecords.put(elementName, Schema.recordOf(elementName, fields));
break;
default:
reader.skipValue();
break;
}
}
reader.endObject();
if (schemaType == null) {
throw new IllegalStateException("Schema type cannot be null.");
}
if (logicalType != null) {
if (logicalType == Schema.LogicalType.DECIMAL) {
try {
return Schema.decimalOf(precision, scale);
} catch (IllegalArgumentException e) {
throw new IOException("Decimal type must contain a positive precision value.");
}
}
return Schema.of(logicalType);
}
Schema schema;
switch(schemaType) {
case ARRAY:
schema = Schema.arrayOf(items);
break;
case ENUM:
schema = Schema.enumWith(enumValues);
break;
case MAP:
schema = Schema.mapOf(keys, values);
break;
case RECORD:
schema = Schema.recordOf(elementName, fields);
break;
default:
schema = Schema.of(schemaType);
break;
}
return schema;
}
use of io.cdap.cdap.api.data.schema.Schema in project cdap by caskdata.
the class SQLSchemaParser method parseMap.
// <type,type>
private Schema parseMap() throws IOException {
expectChar('<', "map must be followed by a '<'");
skipWhitespace();
Schema keyType = parseType();
// key and value must be separated by a comma
advancePastComma("Expected a comma separating map key and value types");
Schema valueType = parseType();
skipWhitespace();
expectChar('>', "map must end with a '>'");
return Schema.mapOf(keyType, valueType);
}
use of io.cdap.cdap.api.data.schema.Schema in project cdap by caskdata.
the class SQLSchemaParser method parseArray.
// <type>
private Schema parseArray() throws IOException {
expectChar('<', "array must be followed by a '<'");
skipWhitespace();
Schema componentType = parseType();
skipWhitespace();
expectChar('>', "array must end with a '>'");
return Schema.arrayOf(componentType);
}
use of io.cdap.cdap.api.data.schema.Schema in project cdap by caskdata.
the class DataFrames method dataTypeToSchema.
/**
* Converts a Spark {@link DataType} to a {@link Schema} object.
*
* @param dataType the data type to convert from
* @param recordCounter tracks number of record schema becoming created; used for record name generation only
* @return a new {@link Schema}.
*/
private static Schema dataTypeToSchema(DataType dataType, int[] recordCounter) {
if (dataType.equals(DataTypes.NullType)) {
return Schema.of(Schema.Type.NULL);
}
if (dataType.equals(DataTypes.BooleanType)) {
return Schema.of(Schema.Type.BOOLEAN);
}
if (dataType.equals(DataTypes.ByteType)) {
return Schema.of(Schema.Type.INT);
}
if (dataType.equals(DataTypes.ShortType)) {
return Schema.of(Schema.Type.INT);
}
if (dataType.equals(DataTypes.IntegerType)) {
return Schema.of(Schema.Type.INT);
}
if (dataType.equals(DataTypes.LongType)) {
return Schema.of(Schema.Type.LONG);
}
if (dataType.equals(DataTypes.FloatType)) {
return Schema.of(Schema.Type.FLOAT);
}
if (dataType.equals(DataTypes.DoubleType)) {
return Schema.of(Schema.Type.DOUBLE);
}
if (dataType.equals(DataTypes.BinaryType)) {
return Schema.of(Schema.Type.BYTES);
}
if (dataType.equals(DataTypes.StringType)) {
return Schema.of(Schema.Type.STRING);
}
if (dataType instanceof ArrayType) {
ArrayType arrayType = (ArrayType) dataType;
// Special case for byte array
if (arrayType.elementType() == DataTypes.ByteType) {
return Schema.of(Schema.Type.BYTES);
}
Schema componentSchema = dataTypeToSchema(arrayType.elementType(), recordCounter);
return Schema.arrayOf(arrayType.containsNull() ? Schema.nullableOf(componentSchema) : componentSchema);
}
if (dataType instanceof MapType) {
MapType mapType = (MapType) dataType;
Schema valueSchema = dataTypeToSchema(mapType.valueType(), recordCounter);
return Schema.mapOf(dataTypeToSchema(mapType.keyType(), recordCounter), mapType.valueContainsNull() ? Schema.nullableOf(valueSchema) : valueSchema);
}
if (dataType instanceof StructType) {
List<Schema.Field> fields = new ArrayList<>();
for (StructField structField : ((StructType) dataType).fields()) {
Schema fieldSchema = dataTypeToSchema(structField.dataType(), recordCounter);
fields.add(Schema.Field.of(structField.name(), structField.nullable() ? Schema.nullableOf(fieldSchema) : fieldSchema));
}
return Schema.recordOf("Record" + recordCounter[0]++, fields);
}
// Some special types in Spark SQL
if (dataType.equals(DataTypes.TimestampType)) {
return Schema.of(Schema.Type.LONG);
}
if (dataType.equals(DataTypes.DateType)) {
return Schema.of(Schema.Type.LONG);
}
// Not support the CalendarInterval type for now, as there is no equivalent in Schema
throw new IllegalArgumentException("Unsupported data type: " + dataType.typeName());
}
Aggregations