Search in sources :

Example 1 with ArrowType

use of org.apache.arrow.vector.types.pojo.ArrowType in project textdb by TextDB.

the class NltkSentimentOperator method convertToTexeraSchema.

private Schema convertToTexeraSchema(org.apache.arrow.vector.types.pojo.Schema arrowSchema) {
    List<Attribute> texeraAttributes = new ArrayList<>();
    for (Field f : arrowSchema.getFields()) {
        String attributeName = f.getName();
        AttributeType attributeType;
        ArrowType arrowType = f.getFieldType().getType();
        switch(arrowType.getTypeID()) {
            case Int:
                attributeType = INTEGER;
                break;
            case FloatingPoint:
                attributeType = DOUBLE;
                break;
            case Bool:
                attributeType = BOOLEAN;
                break;
            case Utf8:
            case Null:
                attributeType = TEXT;
                break;
            case Date:
                attributeType = DATE;
                break;
            case Struct:
                // For now only Struct of DateTime
                attributeType = DATETIME;
                break;
            case List:
                attributeType = LIST;
                break;
            default:
                throw (new DataflowException("Unsupported data type " + arrowType.getTypeID() + " when converting back to Texera table."));
        }
        texeraAttributes.add(new Attribute(attributeName, attributeType));
    }
    return new Schema(texeraAttributes);
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) Attribute(edu.uci.ics.texera.api.schema.Attribute) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) DataflowException(edu.uci.ics.texera.api.exception.DataflowException)

Example 2 with ArrowType

use of org.apache.arrow.vector.types.pojo.ArrowType in project flink by apache.

the class ArrowUtilsTest method init.

@BeforeClass
public static void init() {
    testFields = new ArrayList<>();
    testFields.add(Tuple5.of("f1", new TinyIntType(), new ArrowType.Int(8, true), TinyIntWriter.TinyIntWriterForRow.class, ArrowTinyIntColumnVector.class));
    testFields.add(Tuple5.of("f2", new SmallIntType(), new ArrowType.Int(8 * 2, true), SmallIntWriter.SmallIntWriterForRow.class, ArrowSmallIntColumnVector.class));
    testFields.add(Tuple5.of("f3", new IntType(), new ArrowType.Int(8 * 4, true), IntWriter.IntWriterForRow.class, ArrowIntColumnVector.class));
    testFields.add(Tuple5.of("f4", new BigIntType(), new ArrowType.Int(8 * 8, true), BigIntWriter.BigIntWriterForRow.class, ArrowBigIntColumnVector.class));
    testFields.add(Tuple5.of("f5", new BooleanType(), new ArrowType.Bool(), BooleanWriter.BooleanWriterForRow.class, ArrowBooleanColumnVector.class));
    testFields.add(Tuple5.of("f6", new FloatType(), new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), FloatWriter.FloatWriterForRow.class, ArrowFloatColumnVector.class));
    testFields.add(Tuple5.of("f7", new DoubleType(), new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), DoubleWriter.DoubleWriterForRow.class, ArrowDoubleColumnVector.class));
    testFields.add(Tuple5.of("f8", new VarCharType(), ArrowType.Utf8.INSTANCE, VarCharWriter.VarCharWriterForRow.class, ArrowVarCharColumnVector.class));
    testFields.add(Tuple5.of("f9", new VarBinaryType(), ArrowType.Binary.INSTANCE, VarBinaryWriter.VarBinaryWriterForRow.class, ArrowVarBinaryColumnVector.class));
    testFields.add(Tuple5.of("f10", new DecimalType(10, 3), new ArrowType.Decimal(10, 3), DecimalWriter.DecimalWriterForRow.class, ArrowDecimalColumnVector.class));
    testFields.add(Tuple5.of("f11", new DateType(), new ArrowType.Date(DateUnit.DAY), DateWriter.DateWriterForRow.class, ArrowDateColumnVector.class));
    testFields.add(Tuple5.of("f13", new TimeType(0), new ArrowType.Time(TimeUnit.SECOND, 32), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
    testFields.add(Tuple5.of("f14", new TimeType(2), new ArrowType.Time(TimeUnit.MILLISECOND, 32), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
    testFields.add(Tuple5.of("f15", new TimeType(4), new ArrowType.Time(TimeUnit.MICROSECOND, 64), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
    testFields.add(Tuple5.of("f16", new TimeType(8), new ArrowType.Time(TimeUnit.NANOSECOND, 64), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
    testFields.add(Tuple5.of("f17", new LocalZonedTimestampType(0), new ArrowType.Timestamp(TimeUnit.SECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f18", new LocalZonedTimestampType(2), new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f19", new LocalZonedTimestampType(4), new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f20", new LocalZonedTimestampType(8), new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f21", new TimestampType(0), new ArrowType.Timestamp(TimeUnit.SECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f22", new TimestampType(2), new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f23", new TimestampType(4), new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f24", new TimestampType(8), new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f25", new ArrayType(new VarCharType()), ArrowType.List.INSTANCE, ArrayWriter.ArrayWriterForRow.class, ArrowArrayColumnVector.class));
    RowType rowFieldType = new RowType(Arrays.asList(new RowType.RowField("a", new IntType()), new RowType.RowField("b", new VarCharType()), new RowType.RowField("c", new ArrayType(new VarCharType())), new RowType.RowField("d", new TimestampType(2)), new RowType.RowField("e", new RowType((Arrays.asList(new RowType.RowField("e1", new IntType()), new RowType.RowField("e2", new VarCharType())))))));
    testFields.add(Tuple5.of("f26", rowFieldType, ArrowType.Struct.INSTANCE, RowWriter.RowWriterForRow.class, ArrowRowColumnVector.class));
    List<RowType.RowField> rowFields = new ArrayList<>();
    for (Tuple5<String, LogicalType, ArrowType, Class<?>, Class<?>> field : testFields) {
        rowFields.add(new RowType.RowField(field.f0, field.f1));
    }
    rowType = new RowType(rowFields);
    allocator = ArrowUtils.getRootAllocator().newChildAllocator("stdout", 0, Long.MAX_VALUE);
}
Also used : ArrowArrayColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowArrayColumnVector) ArrayList(java.util.ArrayList) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) FloatType(org.apache.flink.table.types.logical.FloatType) ArrayType(org.apache.flink.table.types.logical.ArrayType) DateType(org.apache.flink.table.types.logical.DateType) ArrowTimeColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimeColumnVector) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) DecimalType(org.apache.flink.table.types.logical.DecimalType) BeforeClass(org.junit.BeforeClass) ArrowTimestampColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimestampColumnVector) VarBinaryType(org.apache.flink.table.types.logical.VarBinaryType) ArrowTinyIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTinyIntColumnVector) ArrowIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowIntColumnVector) BigIntType(org.apache.flink.table.types.logical.BigIntType) RowType(org.apache.flink.table.types.logical.RowType) ArrowBooleanColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBooleanColumnVector) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) TimeType(org.apache.flink.table.types.logical.TimeType) ArrowDoubleColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDoubleColumnVector) ArrowDateColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDateColumnVector) ArrowSmallIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowSmallIntColumnVector) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) TimestampType(org.apache.flink.table.types.logical.TimestampType) ArrowVarBinaryColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarBinaryColumnVector) VarCharType(org.apache.flink.table.types.logical.VarCharType) ArrowDecimalColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDecimalColumnVector) ArrowFloatColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowFloatColumnVector) BooleanType(org.apache.flink.table.types.logical.BooleanType) ArrowVarCharColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarCharColumnVector) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) DoubleType(org.apache.flink.table.types.logical.DoubleType) ArrowBigIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBigIntColumnVector) ArrowRowColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowRowColumnVector) BeforeClass(org.junit.BeforeClass)

Aggregations

ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)2 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)1 Attribute (edu.uci.ics.texera.api.schema.Attribute)1 AttributeType (edu.uci.ics.texera.api.schema.AttributeType)1 Schema (edu.uci.ics.texera.api.schema.Schema)1 ArrayList (java.util.ArrayList)1 Field (org.apache.arrow.vector.types.pojo.Field)1 ArrowArrayColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowArrayColumnVector)1 ArrowBigIntColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowBigIntColumnVector)1 ArrowBooleanColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowBooleanColumnVector)1 ArrowDateColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowDateColumnVector)1 ArrowDecimalColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowDecimalColumnVector)1 ArrowDoubleColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowDoubleColumnVector)1 ArrowFloatColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowFloatColumnVector)1 ArrowIntColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowIntColumnVector)1 ArrowRowColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowRowColumnVector)1 ArrowSmallIntColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowSmallIntColumnVector)1 ArrowTimeColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowTimeColumnVector)1 ArrowTimestampColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowTimestampColumnVector)1 ArrowTinyIntColumnVector (org.apache.flink.table.runtime.arrow.vectors.ArrowTinyIntColumnVector)1