Search in sources :

Example 11 with Field

use of org.apache.arrow.vector.types.pojo.Field in project parquet-mr by apache.

the class SchemaConverter method fromParquetGroup.

/**
 * @param type parquet types
 * @param name overrides parquet.getName()
 * @return the mapping
 */
private TypeMapping fromParquetGroup(GroupType type, String name) {
    LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
    if (logicalType == null) {
        List<TypeMapping> typeMappings = fromParquet(type.getFields());
        Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct(), fields(typeMappings));
        return new StructTypeMapping(arrowField, type, typeMappings);
    } else {
        return logicalType.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {

            @Override
            public Optional<TypeMapping> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
                List3Levels list3Levels = new List3Levels(type);
                TypeMapping child = fromParquet(list3Levels.getElement(), null, list3Levels.getElement().getRepetition());
                Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new ArrowType.List(), asList(child.getArrowField()));
                return of(new ListTypeMapping(arrowField, list3Levels, child));
            }
        }).orElseThrow(() -> new UnsupportedOperationException("Unsupported type " + type));
    }
}
Also used : ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) StructTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.StructTypeMapping) Struct(org.apache.arrow.vector.types.pojo.ArrowType.Struct) Field(org.apache.arrow.vector.types.pojo.Field) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) ListTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.ListTypeMapping) PrimitiveTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.PrimitiveTypeMapping) RepeatedTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.RepeatedTypeMapping) UnionTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.UnionTypeMapping) ListTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.ListTypeMapping) StructTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.StructTypeMapping) TypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.TypeMapping)

Example 12 with Field

use of org.apache.arrow.vector.types.pojo.Field in project parquet-mr by apache.

the class SchemaConverter method fromParquet.

/**
 * Creates an Arrow Schema from an Parquet one and returns the mapping
 * @param parquetSchema the provided Parquet Schema
 * @return the mapping between the 2
 */
public SchemaMapping fromParquet(MessageType parquetSchema) {
    List<Type> fields = parquetSchema.getFields();
    List<TypeMapping> mappings = fromParquet(fields);
    List<Field> arrowFields = fields(mappings);
    return new SchemaMapping(new Schema(arrowFields), parquetSchema, mappings);
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) PrimitiveType(org.apache.parquet.schema.PrimitiveType) LogicalTypeAnnotation.decimalType(org.apache.parquet.schema.LogicalTypeAnnotation.decimalType) LogicalTypeAnnotation.dateType(org.apache.parquet.schema.LogicalTypeAnnotation.dateType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) LogicalTypeAnnotation.intType(org.apache.parquet.schema.LogicalTypeAnnotation.intType) LogicalTypeAnnotation.timeType(org.apache.parquet.schema.LogicalTypeAnnotation.timeType) LogicalTypeAnnotation.timestampType(org.apache.parquet.schema.LogicalTypeAnnotation.timestampType) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) LogicalTypeAnnotation.stringType(org.apache.parquet.schema.LogicalTypeAnnotation.stringType) Schema(org.apache.arrow.vector.types.pojo.Schema) PrimitiveTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.PrimitiveTypeMapping) RepeatedTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.RepeatedTypeMapping) UnionTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.UnionTypeMapping) ListTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.ListTypeMapping) StructTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.StructTypeMapping) TypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.TypeMapping)

Example 13 with Field

use of org.apache.arrow.vector.types.pojo.Field in project parquet-mr by apache.

the class SchemaConverter method map.

private List<TypeMapping> map(List<Field> arrowFields, List<Type> parquetFields) {
    if (arrowFields.size() != parquetFields.size()) {
        throw new IllegalArgumentException("Can not map schemas as sizes differ: " + arrowFields + " != " + parquetFields);
    }
    List<TypeMapping> result = new ArrayList<>(arrowFields.size());
    for (int i = 0; i < arrowFields.size(); i++) {
        Field arrowField = arrowFields.get(i);
        Type parquetField = parquetFields.get(i);
        result.add(map(arrowField, parquetField));
    }
    return result;
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) PrimitiveType(org.apache.parquet.schema.PrimitiveType) LogicalTypeAnnotation.decimalType(org.apache.parquet.schema.LogicalTypeAnnotation.decimalType) LogicalTypeAnnotation.dateType(org.apache.parquet.schema.LogicalTypeAnnotation.dateType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) LogicalTypeAnnotation.intType(org.apache.parquet.schema.LogicalTypeAnnotation.intType) LogicalTypeAnnotation.timeType(org.apache.parquet.schema.LogicalTypeAnnotation.timeType) LogicalTypeAnnotation.timestampType(org.apache.parquet.schema.LogicalTypeAnnotation.timestampType) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) LogicalTypeAnnotation.stringType(org.apache.parquet.schema.LogicalTypeAnnotation.stringType) ArrayList(java.util.ArrayList) PrimitiveTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.PrimitiveTypeMapping) RepeatedTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.RepeatedTypeMapping) UnionTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.UnionTypeMapping) ListTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.ListTypeMapping) StructTypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.StructTypeMapping) TypeMapping(org.apache.parquet.arrow.schema.SchemaMapping.TypeMapping) FloatingPoint(org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint)

Example 14 with Field

use of org.apache.arrow.vector.types.pojo.Field in project twister2 by DSC-SPIDAL.

the class RowSchema method toArrowSchema.

public org.apache.arrow.vector.types.pojo.Schema toArrowSchema() {
    List<Field> fields = new ArrayList<>();
    for (TField f : types) {
        Field field;
        if (f.getType().equals(MessageTypes.INTEGER)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.Int(32, true), null), null);
        } else if (f.getType().equals(MessageTypes.LONG)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.Int(64, true), null), null);
        } else if (f.getType().equals(MessageTypes.SHORT)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.Int(16, true), null), null);
        } else if (f.getType().equals(MessageTypes.FLOAT)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null), null);
        } else if (f.getType().equals(MessageTypes.DOUBLE)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null), null);
        } else if (f.getType().equals(MessageTypes.STRING)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.Binary(), null), null);
        } else if (f.getType().equals(MessageTypes.BYTE)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.Binary(), null), null);
        } else {
            throw new Twister2RuntimeException("Un-known type");
        }
        fields.add(field);
    }
    return new org.apache.arrow.vector.types.pojo.Schema(fields);
}
Also used : Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) TField(edu.iu.dsc.tws.common.table.TField) ArrayList(java.util.ArrayList) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) FieldType(org.apache.arrow.vector.types.pojo.FieldType) Field(org.apache.arrow.vector.types.pojo.Field) TField(edu.iu.dsc.tws.common.table.TField)

Example 15 with Field

use of org.apache.arrow.vector.types.pojo.Field in project twister2 by DSC-SPIDAL.

the class ArrowAllToAll method loadBuffers.

private void loadBuffers(FieldVector vector, Field field, Iterator<ArrowBuf> buffers, Iterator<ArrowFieldNode> nodes) {
    checkArgument(nodes.hasNext(), "no more field nodes for for field %s and vector %s", field, vector);
    ArrowFieldNode fieldNode = nodes.next();
    int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType());
    List<ArrowBuf> ownBuffers = new ArrayList<>(bufferLayoutCount);
    for (int j = 0; j < bufferLayoutCount; j++) {
        ownBuffers.add(buffers.next());
    }
    try {
        vector.loadFieldBuffers(fieldNode, ownBuffers);
    } catch (RuntimeException e) {
        throw new IllegalArgumentException("Could not load buffers for field " + field + ". error message: " + e.getMessage(), e);
    }
    List<Field> children = field.getChildren();
    if (children.size() > 0) {
        List<FieldVector> childrenFromFields = vector.getChildrenFromFields();
        checkArgument(children.size() == childrenFromFields.size(), "should have as many children as in the schema: found %s expected %s", childrenFromFields.size(), children.size());
        for (int i = 0; i < childrenFromFields.size(); i++) {
            Field child = children.get(i);
            FieldVector fieldVector = childrenFromFields.get(i);
            loadBuffers(fieldVector, child, buffers, nodes);
        }
    }
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) ArrowBuf(io.netty.buffer.ArrowBuf) ArrayList(java.util.ArrayList) ArrowFieldNode(org.apache.arrow.vector.ipc.message.ArrowFieldNode) FieldVector(org.apache.arrow.vector.FieldVector)

Aggregations

Field (org.apache.arrow.vector.types.pojo.Field)18 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)10 ArrayList (java.util.ArrayList)6 ListTypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.ListTypeMapping)5 PrimitiveTypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.PrimitiveTypeMapping)5 RepeatedTypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.RepeatedTypeMapping)5 StructTypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.StructTypeMapping)5 TypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.TypeMapping)5 UnionTypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.UnionTypeMapping)5 Schema (org.apache.arrow.vector.types.pojo.Schema)4 FieldVector (org.apache.arrow.vector.FieldVector)3 FieldType (org.apache.arrow.vector.types.pojo.FieldType)3 MessageType (org.apache.parquet.schema.MessageType)3 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)2 TField (edu.iu.dsc.tws.common.table.TField)2 Attribute (edu.uci.ics.texera.api.schema.Attribute)2 Schema (edu.uci.ics.texera.api.schema.Schema)2 Float8Vector (org.apache.arrow.vector.Float8Vector)2 IntVector (org.apache.arrow.vector.IntVector)2 GroupType (org.apache.parquet.schema.GroupType)2