use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class AvroSchemaConverter method convertField.
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition, String schemaPath) {
Types.PrimitiveBuilder<PrimitiveType> builder;
Schema.Type type = schema.getType();
LogicalType logicalType = schema.getLogicalType();
if (type.equals(Schema.Type.BOOLEAN)) {
builder = Types.primitive(BOOLEAN, repetition);
} else if (type.equals(Schema.Type.INT)) {
builder = Types.primitive(INT32, repetition);
} else if (type.equals(Schema.Type.LONG)) {
builder = Types.primitive(INT64, repetition);
} else if (type.equals(Schema.Type.FLOAT)) {
builder = Types.primitive(FLOAT, repetition);
} else if (type.equals(Schema.Type.DOUBLE)) {
builder = Types.primitive(DOUBLE, repetition);
} else if (type.equals(Schema.Type.BYTES)) {
builder = Types.primitive(BINARY, repetition);
} else if (type.equals(Schema.Type.STRING)) {
if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) {
builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES);
} else {
builder = Types.primitive(BINARY, repetition).as(stringType());
}
} else if (type.equals(Schema.Type.RECORD)) {
return new GroupType(repetition, fieldName, convertFields(schema.getFields(), schemaPath));
} else if (type.equals(Schema.Type.ENUM)) {
builder = Types.primitive(BINARY, repetition).as(enumType());
} else if (type.equals(Schema.Type.ARRAY)) {
if (writeOldListStructure) {
return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED, schemaPath));
} else {
return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType(), schemaPath));
}
} else if (type.equals(Schema.Type.MAP)) {
Type valType = convertField("value", schema.getValueType(), schemaPath);
// avro map key type is always string
return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
} else if (type.equals(Schema.Type.FIXED)) {
if (pathsToInt96.contains(schemaPath)) {
if (schema.getFixedSize() != 12) {
throw new IllegalArgumentException("The size of the fixed type field " + schemaPath + " must be 12 bytes for INT96 conversion");
}
builder = Types.primitive(PrimitiveTypeName.INT96, repetition);
} else {
builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(schema.getFixedSize());
}
} else if (type.equals(Schema.Type.UNION)) {
return convertUnion(fieldName, schema, repetition, schemaPath);
} else {
throw new UnsupportedOperationException("Cannot convert Avro type " + type);
}
// creates an equivalence
if (logicalType != null) {
if (logicalType instanceof LogicalTypes.Decimal) {
LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
builder = builder.as(decimalType(decimal.getScale(), decimal.getPrecision()));
} else {
LogicalTypeAnnotation annotation = convertLogicalType(logicalType);
if (annotation != null) {
builder.as(annotation);
}
}
}
return builder.named(fieldName);
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestJsonRecordFormatter method testNestedGrouping.
@Test
public void testNestedGrouping() throws Exception {
SimpleRecord simple = new SimpleRecord();
MessageType schema = new MessageType("schema", new PrimitiveType(Type.Repetition.REPEATED, PrimitiveType.PrimitiveTypeName.BINARY, "flat-string"), new GroupType(Type.Repetition.OPTIONAL, "subgroup", new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT32, "flat-int"), new PrimitiveType(Type.Repetition.REPEATED, PrimitiveType.PrimitiveTypeName.BINARY, "string-list")));
SimpleRecord subgroup = new SimpleRecord();
subgroup.values.add(kv("flat-int", 12345));
subgroup.values.add(kv("string-list", "two"));
subgroup.values.add(kv("string-list", "four"));
subgroup.values.add(kv("string-list", "six"));
subgroup.values.add(kv("string-list", "eight"));
subgroup.values.add(kv("string-list", "ten"));
simple.values.add(kv("flat-string", "one"));
simple.values.add(kv("flat-string", "two"));
simple.values.add(kv("flat-string", "three"));
simple.values.add(kv("flat-string", "four"));
simple.values.add(kv("flat-string", "five"));
simple.values.add(kv("subgroup", subgroup));
String actual = JsonRecordFormatter.fromSchema(schema).formatRecord(simple);
String expected = asJsonString(obj(entry("flat-string", array("one", "two", "three", "four", "five")), entry("subgroup", obj(entry("flat-int", 12345), entry("string-list", array("two", "four", "six", "eight", "ten"))))));
assertEquals(expected, actual);
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestJsonRecordFormatter method testGroupList.
@Test
public void testGroupList() throws Exception {
SimpleRecord simple = new SimpleRecord();
MessageType schema = new MessageType("schema", new GroupType(Type.Repetition.REPEATED, "repeat-group", new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT64, "flat-int"), new PrimitiveType(Type.Repetition.REPEATED, PrimitiveType.PrimitiveTypeName.DOUBLE, "repeat-double")));
SimpleRecord repeatGroup = new SimpleRecord();
repeatGroup.values.add(kv("flat-int", 76543));
repeatGroup.values.add(kv("repeat-double", 1.2345));
repeatGroup.values.add(kv("repeat-double", 5.6789));
repeatGroup.values.add(kv("repeat-double", 10.11121314));
repeatGroup.values.add(kv("repeat-double", 0.4321));
repeatGroup.values.add(kv("repeat-double", 7.6543));
simple.values.add(kv("repeat-group", repeatGroup));
repeatGroup = new SimpleRecord();
repeatGroup.values.add(kv("flat-int", 12345));
repeatGroup.values.add(kv("repeat-double", 1.1));
repeatGroup.values.add(kv("repeat-double", 1.2));
repeatGroup.values.add(kv("repeat-double", 1.3));
repeatGroup.values.add(kv("repeat-double", 1.4));
repeatGroup.values.add(kv("repeat-double", 1.5));
simple.values.add(kv("repeat-group", repeatGroup));
repeatGroup = new SimpleRecord();
repeatGroup.values.add(kv("flat-int", 10293));
repeatGroup.values.add(kv("repeat-double", 9.5));
repeatGroup.values.add(kv("repeat-double", 9.4));
repeatGroup.values.add(kv("repeat-double", 9.3));
repeatGroup.values.add(kv("repeat-double", 9.2));
repeatGroup.values.add(kv("repeat-double", 9.1));
simple.values.add(kv("repeat-group", repeatGroup));
String actual = JsonRecordFormatter.fromSchema(schema).formatRecord(simple);
String expected = asJsonString(obj(entry("repeat-group", array(obj(entry("flat-int", 76543), entry("repeat-double", array(1.2345, 5.6789, 10.11121314, 0.4321, 7.6543))), obj(entry("flat-int", 12345), entry("repeat-double", array(1.1, 1.2, 1.3, 1.4, 1.5))), obj(entry("flat-int", 10293), entry("repeat-double", array(9.5, 9.4, 9.3, 9.2, 9.1)))))));
assertEquals(expected, actual);
}
use of org.apache.parquet.schema.PrimitiveType in project drill by axbaretto.
the class ParquetRecordWriter method getPrimitiveType.
private PrimitiveType getPrimitiveType(MaterializedField field) {
MinorType minorType = field.getType().getMinorType();
String name = field.getName();
PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType);
Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode());
OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType);
DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
int length = ParquetTypeHelper.getLengthForMinorType(minorType);
return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null);
}
use of org.apache.parquet.schema.PrimitiveType in project drill by axbaretto.
the class Metadata method getColTypeInfo.
private ColTypeInfo getColTypeInfo(MessageType schema, Type type, String[] path, int depth) {
if (type.isPrimitive()) {
PrimitiveType primitiveType = (PrimitiveType) type;
int precision = 0;
int scale = 0;
if (primitiveType.getDecimalMetadata() != null) {
precision = primitiveType.getDecimalMetadata().getPrecision();
scale = primitiveType.getDecimalMetadata().getScale();
}
int repetitionLevel = schema.getMaxRepetitionLevel(path);
int definitionLevel = schema.getMaxDefinitionLevel(path);
return new ColTypeInfo(type.getOriginalType(), precision, scale, repetitionLevel, definitionLevel);
}
Type t = ((GroupType) type).getType(path[depth]);
return getColTypeInfo(schema, t, path, depth + 1);
}
Aggregations