use of org.apache.parquet.schema.Type in project parquet-mr by apache.
the class AvroSchemaConverter method convertField.
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
Types.PrimitiveBuilder<PrimitiveType> builder;
Schema.Type type = schema.getType();
if (type.equals(Schema.Type.BOOLEAN)) {
builder = Types.primitive(BOOLEAN, repetition);
} else if (type.equals(Schema.Type.INT)) {
builder = Types.primitive(INT32, repetition);
} else if (type.equals(Schema.Type.LONG)) {
builder = Types.primitive(INT64, repetition);
} else if (type.equals(Schema.Type.FLOAT)) {
builder = Types.primitive(FLOAT, repetition);
} else if (type.equals(Schema.Type.DOUBLE)) {
builder = Types.primitive(DOUBLE, repetition);
} else if (type.equals(Schema.Type.BYTES)) {
builder = Types.primitive(BINARY, repetition);
} else if (type.equals(Schema.Type.STRING)) {
builder = Types.primitive(BINARY, repetition).as(UTF8);
} else if (type.equals(Schema.Type.RECORD)) {
return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
} else if (type.equals(Schema.Type.ENUM)) {
builder = Types.primitive(BINARY, repetition).as(ENUM);
} else if (type.equals(Schema.Type.ARRAY)) {
if (writeOldListStructure) {
return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED));
} else {
return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
}
} else if (type.equals(Schema.Type.MAP)) {
Type valType = convertField("value", schema.getValueType());
// avro map key type is always string
return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
} else if (type.equals(Schema.Type.FIXED)) {
builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(schema.getFixedSize());
} else if (type.equals(Schema.Type.UNION)) {
return convertUnion(fieldName, schema, repetition);
} else {
throw new UnsupportedOperationException("Cannot convert Avro type " + type);
}
// schema translation can only be done for known logical types because this
// creates an equivalence
LogicalType logicalType = schema.getLogicalType();
if (logicalType != null) {
if (logicalType instanceof LogicalTypes.Decimal) {
builder = builder.as(DECIMAL).precision(((LogicalTypes.Decimal) logicalType).getPrecision()).scale(((LogicalTypes.Decimal) logicalType).getScale());
} else {
OriginalType annotation = convertLogicalType(logicalType);
if (annotation != null) {
builder.as(annotation);
}
}
}
return builder.named(fieldName);
}
use of org.apache.parquet.schema.Type in project parquet-mr by apache.
the class AvroSchemaConverter method convertField.
private Schema convertField(final Type parquetType) {
if (parquetType.isPrimitive()) {
final PrimitiveType asPrimitive = parquetType.asPrimitiveType();
final PrimitiveTypeName parquetPrimitiveTypeName = asPrimitive.getPrimitiveTypeName();
final OriginalType annotation = parquetType.getOriginalType();
Schema schema = parquetPrimitiveTypeName.convert(new PrimitiveType.PrimitiveTypeNameConverter<Schema, RuntimeException>() {
@Override
public Schema convertBOOLEAN(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.BOOLEAN);
}
@Override
public Schema convertINT32(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.INT);
}
@Override
public Schema convertINT64(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.LONG);
}
@Override
public Schema convertINT96(PrimitiveTypeName primitiveTypeName) {
throw new IllegalArgumentException("INT96 not yet implemented.");
}
@Override
public Schema convertFLOAT(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.FLOAT);
}
@Override
public Schema convertDOUBLE(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.DOUBLE);
}
@Override
public Schema convertFIXED_LEN_BYTE_ARRAY(PrimitiveTypeName primitiveTypeName) {
int size = parquetType.asPrimitiveType().getTypeLength();
return Schema.createFixed(parquetType.getName(), null, null, size);
}
@Override
public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) {
if (annotation == OriginalType.UTF8 || annotation == OriginalType.ENUM) {
return Schema.create(Schema.Type.STRING);
} else {
return Schema.create(Schema.Type.BYTES);
}
}
});
LogicalType logicalType = convertOriginalType(annotation, asPrimitive.getDecimalMetadata());
if (logicalType != null && (annotation != DECIMAL || parquetPrimitiveTypeName == BINARY || parquetPrimitiveTypeName == FIXED_LEN_BYTE_ARRAY)) {
schema = logicalType.addToSchema(schema);
}
return schema;
} else {
GroupType parquetGroupType = parquetType.asGroupType();
OriginalType originalType = parquetGroupType.getOriginalType();
if (originalType != null) {
switch(originalType) {
case LIST:
if (parquetGroupType.getFieldCount() != 1) {
throw new UnsupportedOperationException("Invalid list type " + parquetGroupType);
}
Type repeatedType = parquetGroupType.getType(0);
if (!repeatedType.isRepetition(REPEATED)) {
throw new UnsupportedOperationException("Invalid list type " + parquetGroupType);
}
if (isElementType(repeatedType, parquetGroupType.getName())) {
// repeated element types are always required
return Schema.createArray(convertField(repeatedType));
} else {
Type elementType = repeatedType.asGroupType().getType(0);
if (elementType.isRepetition(Type.Repetition.OPTIONAL)) {
return Schema.createArray(optional(convertField(elementType)));
} else {
return Schema.createArray(convertField(elementType));
}
}
// for backward-compatibility
case MAP_KEY_VALUE:
case MAP:
if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) {
throw new UnsupportedOperationException("Invalid map type " + parquetGroupType);
}
GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
if (!mapKeyValType.isRepetition(REPEATED) || mapKeyValType.getFieldCount() != 2) {
throw new UnsupportedOperationException("Invalid map type " + parquetGroupType);
}
Type keyType = mapKeyValType.getType(0);
if (!keyType.isPrimitive() || !keyType.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveTypeName.BINARY) || !keyType.getOriginalType().equals(OriginalType.UTF8)) {
throw new IllegalArgumentException("Map key type must be binary (UTF8): " + keyType);
}
Type valueType = mapKeyValType.getType(1);
if (valueType.isRepetition(Type.Repetition.OPTIONAL)) {
return Schema.createMap(optional(convertField(valueType)));
} else {
return Schema.createMap(convertField(valueType));
}
case ENUM:
return Schema.create(Schema.Type.STRING);
case UTF8:
default:
throw new UnsupportedOperationException("Cannot convert Parquet type " + parquetType);
}
} else {
// if no original type then it's a record
return convertFields(parquetGroupType.getName(), parquetGroupType.getFields());
}
}
}
use of org.apache.parquet.schema.Type in project parquet-mr by apache.
the class ColumnReadStoreImpl method getPrimitiveConverter.
private PrimitiveConverter getPrimitiveConverter(ColumnDescriptor path) {
Type currentType = schema;
Converter currentConverter = recordConverter;
for (String fieldName : path.getPath()) {
final GroupType groupType = currentType.asGroupType();
int fieldIndex = groupType.getFieldIndex(fieldName);
currentType = groupType.getType(fieldName);
currentConverter = currentConverter.asGroupConverter().getConverter(fieldIndex);
}
PrimitiveConverter converter = currentConverter.asPrimitiveConverter();
return converter;
}
use of org.apache.parquet.schema.Type in project parquet-mr by apache.
the class DataWritableWriter method writeArray.
private void writeArray(final ArrayWritable array, final GroupType type) {
if (array == null) {
return;
}
final Writable[] subValues = array.get();
final int fieldCount = type.getFieldCount();
for (int field = 0; field < fieldCount; ++field) {
final Type subType = type.getType(field);
recordConsumer.startField(subType.getName(), field);
for (int i = 0; i < subValues.length; ++i) {
final Writable subValue = subValues[i];
if (subValue != null) {
if (subType.isPrimitive()) {
if (subValue instanceof ArrayWritable) {
// 0 ?
writePrimitive(((ArrayWritable) subValue).get()[field]);
} else {
writePrimitive(subValue);
}
} else {
if (!(subValue instanceof ArrayWritable)) {
throw new RuntimeException("This should be a ArrayWritable: " + subValue);
} else {
recordConsumer.startGroup();
writeData((ArrayWritable) subValue, subType.asGroupType());
recordConsumer.endGroup();
}
}
}
}
recordConsumer.endField(subType.getName(), field);
}
}
use of org.apache.parquet.schema.Type in project parquet-mr by apache.
the class ProtoSchemaConverter method addField.
private <T> Builder<? extends Builder<?, GroupBuilder<T>>, GroupBuilder<T>> addField(Descriptors.FieldDescriptor descriptor, GroupBuilder<T> builder) {
Type.Repetition repetition = getRepetition(descriptor);
JavaType javaType = descriptor.getJavaType();
switch(javaType) {
case BOOLEAN:
return builder.primitive(BOOLEAN, repetition);
case INT:
return builder.primitive(INT32, repetition);
case LONG:
return builder.primitive(INT64, repetition);
case FLOAT:
return builder.primitive(FLOAT, repetition);
case DOUBLE:
return builder.primitive(DOUBLE, repetition);
case BYTE_STRING:
return builder.primitive(BINARY, repetition);
case STRING:
return builder.primitive(BINARY, repetition).as(UTF8);
case MESSAGE:
{
GroupBuilder<GroupBuilder<T>> group = builder.group(repetition);
convertFields(group, descriptor.getMessageType().getFields());
return group;
}
case ENUM:
return builder.primitive(BINARY, repetition).as(ENUM);
default:
throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType);
}
}
Aggregations