Search in sources :

Example 16 with Field

use of org.apache.arrow.vector.types.pojo.Field in project twister2 by DSC-SPIDAL.

the class Twister2ArrowFileWriter method setUpTwister2ArrowWrite.

public boolean setUpTwister2ArrowWrite(int workerId) throws Exception {
    LOG.fine("%%%%%%%%% worker id details:" + workerId + "\t" + arrowFile);
    this.root = VectorSchemaRoot.create(Schema.fromJSON(arrowSchema), this.rootAllocator);
    Path path = new Path(arrowFile);
    this.fileSystem = FileSystemUtils.get(path);
    this.fsDataOutputStream = fileSystem.create(path);
    this.twister2ArrowOutputStream = new Twister2ArrowOutputStream(this.fsDataOutputStream);
    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
    if (!flag) {
        this.arrowFileWriter = new ArrowFileWriter(root, provider, this.fsDataOutputStream.getChannel());
    } else {
        this.arrowFileWriter = new ArrowFileWriter(root, provider, this.twister2ArrowOutputStream);
    }
    LOG.info("root schema fields:" + root.getSchema().getFields());
    for (Field field : root.getSchema().getFields()) {
        FieldVector vector = root.getVector(field.getName());
        if (vector.getMinorType().equals(Types.MinorType.INT)) {
            this.generatorMap.put(vector, new IntVectorGenerator());
        } else if (vector.getMinorType().equals(Types.MinorType.BIGINT)) {
            this.generatorMap.put(vector, new BigIntVectorGenerator());
        } else if (vector.getMinorType().equals(Types.MinorType.FLOAT4)) {
            this.generatorMap.put(vector, new FloatVectorGenerator());
        } else {
            throw new RuntimeException("unsupported arrow write type");
        }
    }
    return true;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Field(org.apache.arrow.vector.types.pojo.Field) DictionaryProvider(org.apache.arrow.vector.dictionary.DictionaryProvider) ArrowFileWriter(org.apache.arrow.vector.ipc.ArrowFileWriter) FieldVector(org.apache.arrow.vector.FieldVector)

Example 17 with Field

use of org.apache.arrow.vector.types.pojo.Field in project parquet-mr by apache.

the class TestSchemaConverter method compareFields.

/**
 * for more pinpointed error on what is different
 * @param left
 * @param right
 */
private void compareFields(List<Field> left, List<Field> right) {
    Assert.assertEquals(left + "\n" + right, left.size(), right.size());
    int size = left.size();
    for (int i = 0; i < size; i++) {
        Field expectedField = left.get(i);
        Field field = right.get(i);
        compareFields(expectedField.getChildren(), field.getChildren());
        Assert.assertEquals(expectedField, field);
    }
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field)

Example 18 with Field

use of org.apache.arrow.vector.types.pojo.Field in project carbondata by apache.

the class ArrowWriter method createFieldWriter.

private static ArrowFieldWriter createFieldWriter(ValueVector valueVector) {
    if (valueVector instanceof BitVector) {
        return new BooleanWriter((BitVector) valueVector);
    } else if (valueVector instanceof TinyIntVector) {
        return new ByteWriter((TinyIntVector) valueVector);
    } else if (valueVector instanceof SmallIntVector) {
        return new ShortWriter((SmallIntVector) valueVector);
    } else if (valueVector instanceof IntVector) {
        return new IntWriter((IntVector) valueVector);
    } else if (valueVector instanceof BigIntVector) {
        return new LongWriter((BigIntVector) valueVector);
    } else if (valueVector instanceof DecimalVector) {
        DecimalVector decimalVector = (DecimalVector) valueVector;
        final Field field = decimalVector.getField();
        ArrowType.Decimal c = (ArrowType.Decimal) field.getType();
        return new DecimalWriter((DecimalVector) valueVector, c.getPrecision(), c.getScale());
    } else if (valueVector instanceof VarCharVector) {
        return new StringWriter((VarCharVector) valueVector);
    } else if (valueVector instanceof Float4Vector) {
        return new FloatWriter((Float4Vector) valueVector);
    } else if (valueVector instanceof Float8Vector) {
        return new DoubleWriter((Float8Vector) valueVector);
    } else if (valueVector instanceof ListVector) {
        ArrowFieldWriter elementVector = createFieldWriter(((ListVector) valueVector).getDataVector());
        return new ArrayWriter((ListVector) valueVector, elementVector);
    } else if (valueVector instanceof StructVector) {
        StructVector s = (StructVector) valueVector;
        List<ArrowFieldWriter> arrowFieldWriters = new ArrayList<>();
        for (int i = 0; i < s.size(); i++) {
            arrowFieldWriters.add(createFieldWriter(s.getChildByOrdinal(i)));
        }
        return new StructWriter(s, arrowFieldWriters.toArray(new ArrowFieldWriter[arrowFieldWriters.size()]));
    } else if (valueVector instanceof VarBinaryVector) {
        return new BinaryWriter((VarBinaryVector) valueVector);
    } else if (valueVector instanceof DateDayVector) {
        return new DateWriter((DateDayVector) valueVector);
    } else if (valueVector instanceof TimeStampMicroTZVector) {
        return new TimeStampWriter((TimeStampMicroTZVector) valueVector);
    } else {
        throw new UnsupportedOperationException("Invalid data type");
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) Field(org.apache.arrow.vector.types.pojo.Field) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) DateDayVector(org.apache.arrow.vector.DateDayVector) DecimalVector(org.apache.arrow.vector.DecimalVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) BigIntVector(org.apache.arrow.vector.BigIntVector) ListVector(org.apache.arrow.vector.complex.ListVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Aggregations

Field (org.apache.arrow.vector.types.pojo.Field)18 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)10 ArrayList (java.util.ArrayList)6 ListTypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.ListTypeMapping)5 PrimitiveTypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.PrimitiveTypeMapping)5 RepeatedTypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.RepeatedTypeMapping)5 StructTypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.StructTypeMapping)5 TypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.TypeMapping)5 UnionTypeMapping (org.apache.parquet.arrow.schema.SchemaMapping.UnionTypeMapping)5 Schema (org.apache.arrow.vector.types.pojo.Schema)4 FieldVector (org.apache.arrow.vector.FieldVector)3 FieldType (org.apache.arrow.vector.types.pojo.FieldType)3 MessageType (org.apache.parquet.schema.MessageType)3 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)2 TField (edu.iu.dsc.tws.common.table.TField)2 Attribute (edu.uci.ics.texera.api.schema.Attribute)2 Schema (edu.uci.ics.texera.api.schema.Schema)2 Float8Vector (org.apache.arrow.vector.Float8Vector)2 IntVector (org.apache.arrow.vector.IntVector)2 GroupType (org.apache.parquet.schema.GroupType)2