use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestAvroSchemaConverter method testTimestampMillisType.
@Test
public void testTimestampMillisType() throws Exception {
Schema date = LogicalTypes.timestampMillis().addToSchema(Schema.create(LONG));
Schema expected = Schema.createRecord("myrecord", null, null, false, Arrays.asList(new Schema.Field("timestamp", date, null, null)));
testRoundTripConversion(expected, "message myrecord {\n" + " required int64 timestamp (TIMESTAMP(MILLIS,true));\n" + "}\n");
for (PrimitiveTypeName primitive : new PrimitiveTypeName[] { INT32, INT96, FLOAT, DOUBLE, BOOLEAN, BINARY, FIXED_LEN_BYTE_ARRAY }) {
final PrimitiveType type;
if (primitive == FIXED_LEN_BYTE_ARRAY) {
type = new PrimitiveType(REQUIRED, primitive, 12, "test", TIMESTAMP_MILLIS);
} else {
type = new PrimitiveType(REQUIRED, primitive, "test", TIMESTAMP_MILLIS);
}
assertThrows("Should not allow TIMESTAMP_MILLIS with " + primitive, IllegalArgumentException.class, () -> new AvroSchemaConverter().convert(message(type)));
}
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestAvroSchemaConverter method testTimestampMicrosType.
@Test
public void testTimestampMicrosType() throws Exception {
Schema date = LogicalTypes.timestampMicros().addToSchema(Schema.create(LONG));
Schema expected = Schema.createRecord("myrecord", null, null, false, Arrays.asList(new Schema.Field("timestamp", date, null, null)));
testRoundTripConversion(expected, "message myrecord {\n" + " required int64 timestamp (TIMESTAMP(MICROS,true));\n" + "}\n");
for (PrimitiveTypeName primitive : new PrimitiveTypeName[] { INT32, INT96, FLOAT, DOUBLE, BOOLEAN, BINARY, FIXED_LEN_BYTE_ARRAY }) {
final PrimitiveType type;
if (primitive == FIXED_LEN_BYTE_ARRAY) {
type = new PrimitiveType(REQUIRED, primitive, 12, "test", TIMESTAMP_MICROS);
} else {
type = new PrimitiveType(REQUIRED, primitive, "test", TIMESTAMP_MICROS);
}
assertThrows("Should not allow TIMESTAMP_MICROS with " + primitive, IllegalArgumentException.class, () -> new AvroSchemaConverter().convert(message(type)));
}
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class AvroSchemaConverter method convertField.
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition, String schemaPath) {
Types.PrimitiveBuilder<PrimitiveType> builder;
Schema.Type type = schema.getType();
LogicalType logicalType = schema.getLogicalType();
if (type.equals(Schema.Type.BOOLEAN)) {
builder = Types.primitive(BOOLEAN, repetition);
} else if (type.equals(Schema.Type.INT)) {
builder = Types.primitive(INT32, repetition);
} else if (type.equals(Schema.Type.LONG)) {
builder = Types.primitive(INT64, repetition);
} else if (type.equals(Schema.Type.FLOAT)) {
builder = Types.primitive(FLOAT, repetition);
} else if (type.equals(Schema.Type.DOUBLE)) {
builder = Types.primitive(DOUBLE, repetition);
} else if (type.equals(Schema.Type.BYTES)) {
builder = Types.primitive(BINARY, repetition);
} else if (type.equals(Schema.Type.STRING)) {
if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) {
builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES);
} else {
builder = Types.primitive(BINARY, repetition).as(stringType());
}
} else if (type.equals(Schema.Type.RECORD)) {
return new GroupType(repetition, fieldName, convertFields(schema.getFields(), schemaPath));
} else if (type.equals(Schema.Type.ENUM)) {
builder = Types.primitive(BINARY, repetition).as(enumType());
} else if (type.equals(Schema.Type.ARRAY)) {
if (writeOldListStructure) {
return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED, schemaPath));
} else {
return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType(), schemaPath));
}
} else if (type.equals(Schema.Type.MAP)) {
Type valType = convertField("value", schema.getValueType(), schemaPath);
// avro map key type is always string
return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
} else if (type.equals(Schema.Type.FIXED)) {
if (pathsToInt96.contains(schemaPath)) {
if (schema.getFixedSize() != 12) {
throw new IllegalArgumentException("The size of the fixed type field " + schemaPath + " must be 12 bytes for INT96 conversion");
}
builder = Types.primitive(PrimitiveTypeName.INT96, repetition);
} else {
builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(schema.getFixedSize());
}
} else if (type.equals(Schema.Type.UNION)) {
return convertUnion(fieldName, schema, repetition, schemaPath);
} else {
throw new UnsupportedOperationException("Cannot convert Avro type " + type);
}
// creates an equivalence
if (logicalType != null) {
if (logicalType instanceof LogicalTypes.Decimal) {
LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
builder = builder.as(decimalType(decimal.getScale(), decimal.getPrecision()));
} else {
LogicalTypeAnnotation annotation = convertLogicalType(logicalType);
if (annotation != null) {
builder.as(annotation);
}
}
}
return builder.named(fieldName);
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class AvroSchemaConverter method convertField.
private Schema convertField(final Type parquetType) {
if (parquetType.isPrimitive()) {
final PrimitiveType asPrimitive = parquetType.asPrimitiveType();
final PrimitiveTypeName parquetPrimitiveTypeName = asPrimitive.getPrimitiveTypeName();
final OriginalType annotation = parquetType.getOriginalType();
Schema schema = parquetPrimitiveTypeName.convert(new PrimitiveType.PrimitiveTypeNameConverter<Schema, RuntimeException>() {
@Override
public Schema convertBOOLEAN(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.BOOLEAN);
}
@Override
public Schema convertINT32(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.INT);
}
@Override
public Schema convertINT64(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.LONG);
}
@Override
public Schema convertINT96(PrimitiveTypeName primitiveTypeName) {
throw new IllegalArgumentException("INT96 not yet implemented.");
}
@Override
public Schema convertFLOAT(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.FLOAT);
}
@Override
public Schema convertDOUBLE(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.DOUBLE);
}
@Override
public Schema convertFIXED_LEN_BYTE_ARRAY(PrimitiveTypeName primitiveTypeName) {
int size = parquetType.asPrimitiveType().getTypeLength();
return Schema.createFixed(parquetType.getName(), null, null, size);
}
@Override
public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) {
if (annotation == OriginalType.UTF8 || annotation == OriginalType.ENUM) {
return Schema.create(Schema.Type.STRING);
} else {
return Schema.create(Schema.Type.BYTES);
}
}
});
LogicalType logicalType = convertOriginalType(annotation, asPrimitive.getDecimalMetadata());
if (logicalType != null && (annotation != DECIMAL || parquetPrimitiveTypeName == BINARY || parquetPrimitiveTypeName == FIXED_LEN_BYTE_ARRAY)) {
schema = logicalType.addToSchema(schema);
}
return schema;
} else {
GroupType parquetGroupType = parquetType.asGroupType();
OriginalType originalType = parquetGroupType.getOriginalType();
if (originalType != null) {
switch(originalType) {
case LIST:
if (parquetGroupType.getFieldCount() != 1) {
throw new UnsupportedOperationException("Invalid list type " + parquetGroupType);
}
Type repeatedType = parquetGroupType.getType(0);
if (!repeatedType.isRepetition(REPEATED)) {
throw new UnsupportedOperationException("Invalid list type " + parquetGroupType);
}
if (isElementType(repeatedType, parquetGroupType.getName())) {
// repeated element types are always required
return Schema.createArray(convertField(repeatedType));
} else {
Type elementType = repeatedType.asGroupType().getType(0);
if (elementType.isRepetition(Type.Repetition.OPTIONAL)) {
return Schema.createArray(optional(convertField(elementType)));
} else {
return Schema.createArray(convertField(elementType));
}
}
// for backward-compatibility
case MAP_KEY_VALUE:
case MAP:
if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) {
throw new UnsupportedOperationException("Invalid map type " + parquetGroupType);
}
GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
if (!mapKeyValType.isRepetition(REPEATED) || mapKeyValType.getFieldCount() != 2) {
throw new UnsupportedOperationException("Invalid map type " + parquetGroupType);
}
Type keyType = mapKeyValType.getType(0);
if (!keyType.isPrimitive() || !keyType.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveTypeName.BINARY) || !keyType.getOriginalType().equals(OriginalType.UTF8)) {
throw new IllegalArgumentException("Map key type must be binary (UTF8): " + keyType);
}
Type valueType = mapKeyValType.getType(1);
if (valueType.isRepetition(Type.Repetition.OPTIONAL)) {
return Schema.createMap(optional(convertField(valueType)));
} else {
return Schema.createMap(convertField(valueType));
}
case ENUM:
return Schema.create(Schema.Type.STRING);
case UTF8:
default:
throw new UnsupportedOperationException("Cannot convert Parquet type " + parquetType);
}
} else {
// if no original type then it's a record
return convertFields(parquetGroupType.getName(), parquetGroupType.getFields());
}
}
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestJsonRecordFormatter method testFlatSchemaWithArrays.
@Test
public void testFlatSchemaWithArrays() throws Exception {
SimpleRecord simple = new SimpleRecord();
MessageType schema = new MessageType("schema", new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, "reqd"), new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.DOUBLE, "opt"), new PrimitiveType(Type.Repetition.REPEATED, PrimitiveType.PrimitiveTypeName.INT32, "odd"), new PrimitiveType(Type.Repetition.REPEATED, PrimitiveType.PrimitiveTypeName.INT64, "even"));
simple.values.add(kv("reqd", "a required value"));
simple.values.add(kv("opt", 1.2345));
simple.values.add(kv("odd", 1));
simple.values.add(kv("odd", 3));
simple.values.add(kv("odd", 5));
simple.values.add(kv("odd", 7));
simple.values.add(kv("odd", 9));
simple.values.add(kv("even", 2));
simple.values.add(kv("even", 4));
simple.values.add(kv("even", 6));
simple.values.add(kv("even", 8));
simple.values.add(kv("even", 10));
String expected = asJsonString(obj(entry("reqd", "a required value"), entry("opt", 1.2345), entry("odd", array(1, 3, 5, 7, 9)), entry("even", array(2, 4, 6, 8, 10))));
String actual = JsonRecordFormatter.fromSchema(schema).formatRecord(simple);
assertEquals(expected, actual);
}
Aggregations