Search in sources :

Example 1 with ListVector

use of org.apache.arrow.vector.complex.ListVector in project flink by apache.

the class ArrowUtils method createArrowFieldWriterForArray.

private static ArrowFieldWriter<ArrayData> createArrowFieldWriterForArray(ValueVector vector, LogicalType fieldType) {
    if (vector instanceof TinyIntVector) {
        return TinyIntWriter.forArray((TinyIntVector) vector);
    } else if (vector instanceof SmallIntVector) {
        return SmallIntWriter.forArray((SmallIntVector) vector);
    } else if (vector instanceof IntVector) {
        return IntWriter.forArray((IntVector) vector);
    } else if (vector instanceof BigIntVector) {
        return BigIntWriter.forArray((BigIntVector) vector);
    } else if (vector instanceof BitVector) {
        return BooleanWriter.forArray((BitVector) vector);
    } else if (vector instanceof Float4Vector) {
        return FloatWriter.forArray((Float4Vector) vector);
    } else if (vector instanceof Float8Vector) {
        return DoubleWriter.forArray((Float8Vector) vector);
    } else if (vector instanceof VarCharVector) {
        return VarCharWriter.forArray((VarCharVector) vector);
    } else if (vector instanceof VarBinaryVector) {
        return VarBinaryWriter.forArray((VarBinaryVector) vector);
    } else if (vector instanceof DecimalVector) {
        DecimalVector decimalVector = (DecimalVector) vector;
        return DecimalWriter.forArray(decimalVector, getPrecision(decimalVector), decimalVector.getScale());
    } else if (vector instanceof DateDayVector) {
        return DateWriter.forArray((DateDayVector) vector);
    } else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
        return TimeWriter.forArray(vector);
    } else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
        int precision;
        if (fieldType instanceof LocalZonedTimestampType) {
            precision = ((LocalZonedTimestampType) fieldType).getPrecision();
        } else {
            precision = ((TimestampType) fieldType).getPrecision();
        }
        return TimestampWriter.forArray(vector, precision);
    } else if (vector instanceof ListVector) {
        ListVector listVector = (ListVector) vector;
        LogicalType elementType = ((ArrayType) fieldType).getElementType();
        return ArrayWriter.forArray(listVector, createArrowFieldWriterForArray(listVector.getDataVector(), elementType));
    } else if (vector instanceof StructVector) {
        RowType rowType = (RowType) fieldType;
        ArrowFieldWriter<RowData>[] fieldsWriters = new ArrowFieldWriter[rowType.getFieldCount()];
        for (int i = 0; i < fieldsWriters.length; i++) {
            fieldsWriters[i] = createArrowFieldWriterForRow(((StructVector) vector).getVectorById(i), rowType.getTypeAt(i));
        }
        return RowWriter.forArray((StructVector) vector, fieldsWriters);
    } else {
        throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) TimeSecVector(org.apache.arrow.vector.TimeSecVector) TimeMilliVector(org.apache.arrow.vector.TimeMilliVector) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) ArrowFieldWriter(org.apache.flink.table.runtime.arrow.writers.ArrowFieldWriter) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) DateDayVector(org.apache.arrow.vector.DateDayVector) DecimalVector(org.apache.arrow.vector.DecimalVector) TimeNanoVector(org.apache.arrow.vector.TimeNanoVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) BigIntVector(org.apache.arrow.vector.BigIntVector) TimeStampVector(org.apache.arrow.vector.TimeStampVector) TimeMicroVector(org.apache.arrow.vector.TimeMicroVector) ListVector(org.apache.arrow.vector.complex.ListVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Example 2 with ListVector

use of org.apache.arrow.vector.complex.ListVector in project textdb by TextDB.

the class NltkSentimentOperator method convertArrowVectorsToResults.

private void convertArrowVectorsToResults(VectorSchemaRoot schemaRoot) {
    List<FieldVector> fieldVectors = schemaRoot.getFieldVectors();
    Schema texeraSchema = convertToTexeraSchema(schemaRoot.getSchema());
    for (int i = 0; i < schemaRoot.getRowCount(); i++) {
        Tuple tuple;
        List<IField> texeraFields = new ArrayList<>();
        for (FieldVector vector : fieldVectors) {
            IField texeraField = null;
            try {
                switch(vector.getField().getFieldType().getType().getTypeID()) {
                    case Int:
                        // It's either IntVector or BigIntVector, but can't know because it depends on Python.
                        try {
                            texeraField = new IntegerField(((IntVector) vector).get(i));
                        } catch (ClassCastException e) {
                            texeraField = new IntegerField((int) ((BigIntVector) vector).get(i));
                        }
                        break;
                    case FloatingPoint:
                        texeraField = new DoubleField((((Float8Vector) vector).get(i)));
                        break;
                    // break;
                    case Utf8:
                        texeraField = new TextField(new String(((VarCharVector) vector).get(i), StandardCharsets.UTF_8));
                        break;
                    case Date:
                        texeraField = new DateField(new Date(((DateDayVector) vector).get(i)));
                        break;
                    case Struct:
                        // For now, struct is only for DateTime
                        DateDayVector subVectorDay = (DateDayVector) ((StructVector) vector).getChildByOrdinal(0);
                        TimeSecVector subVectorTime = (TimeSecVector) ((StructVector) vector).getChildByOrdinal(1);
                        texeraField = new DateTimeField(LocalDateTime.of(LocalDate.ofEpochDay(subVectorDay.get(i)), LocalTime.ofSecondOfDay(subVectorTime.get(i))));
                        break;
                    case List:
                        texeraField = getSpanFromListVector((ListVector) vector, i);
                        break;
                    default:
                        throw (new DataflowException("Unsupported data type " + vector.getField().toString() + " when converting back to Texera table."));
                }
            } catch (IllegalStateException e) {
                if (!e.getMessage().contains("Value at index is null")) {
                    throw new DataflowException(e);
                } else {
                    switch(vector.getField().getFieldType().getType().getTypeID()) {
                        case Int:
                            texeraField = new IntegerField(null);
                            break;
                        case FloatingPoint:
                            texeraField = new DoubleField(null);
                            break;
                        case Date:
                            texeraField = new DateField((String) null);
                            break;
                        case Struct:
                            texeraField = new DateTimeField((String) null);
                            break;
                        case List:
                            texeraField = new ListField<Span>(null);
                        default:
                            break;
                    }
                }
            }
            texeraFields.add(texeraField);
        }
        tuple = new Tuple(texeraSchema, texeraFields);
        resultQueue.add(tuple);
    }
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) LocalDate(java.time.LocalDate) ListVector(org.apache.arrow.vector.complex.ListVector) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 3 with ListVector

use of org.apache.arrow.vector.complex.ListVector in project textdb by TextDB.

the class NltkSentimentOperator method vectorizeTupleToPython.

private void vectorizeTupleToPython(Tuple tuple, int index, VectorSchemaRoot schemaRoot) {
    for (Attribute a : tuple.getSchema().getAttributes()) {
        String name = a.getName();
        // When it is null, skip it.
        if (tuple.getField(name).getValue() == null)
            continue;
        switch(a.getType()) {
            case INTEGER:
                ((IntVector) schemaRoot.getVector(name)).setSafe(index, (int) tuple.getField(name).getValue());
                break;
            case DOUBLE:
                ((Float8Vector) schemaRoot.getVector(name)).setSafe(index, (double) tuple.getField(name).getValue());
                break;
            case BOOLEAN:
            // break;
            case TEXT:
            case STRING:
            case _ID_TYPE:
                ((VarCharVector) schemaRoot.getVector(name)).setSafe(index, tuple.getField(name).getValue().toString().getBytes(StandardCharsets.UTF_8));
                break;
            case DATE:
                ((DateDayVector) schemaRoot.getVector(name)).setSafe(index, (int) ((LocalDate) tuple.getField(name).getValue()).toEpochDay());
                break;
            case DATETIME:
                StructVector dateTimeStructs = ((StructVector) schemaRoot.getVector(name));
                if (tuple.getField(name).getValue() != null) {
                    dateTimeStructs.setIndexDefined(index);
                    DateDayVector subVectorDay = (DateDayVector) dateTimeStructs.getVectorById(0);
                    TimeSecVector subVectorTime = (TimeSecVector) dateTimeStructs.getVectorById(1);
                    LocalDateTime value = (LocalDateTime) tuple.getField(name).getValue();
                    subVectorDay.setSafe(index, (int) value.toLocalDate().toEpochDay());
                    subVectorTime.setSafe(index, value.toLocalTime().toSecondOfDay());
                } else
                    dateTimeStructs.setNull(index);
                break;
            case LIST:
                // For now only supporting span.
                if (((ImmutableList) tuple.getField(name).getValue()).get(0).getClass() != Span.class) {
                    throw (new DataflowException("Unsupported Element Type for List Field!"));
                } else {
                    ListVector listVector = (ListVector) schemaRoot.getVector(name);
                    ImmutableList<Span> spansList = (ImmutableList<Span>) tuple.getField(name).getValue();
                    convertListOfSpans(spansList, listVector, index, name);
                }
                break;
            default:
                break;
        }
    }
}
Also used : LocalDateTime(java.time.LocalDateTime) StructVector(org.apache.arrow.vector.complex.StructVector) Attribute(edu.uci.ics.texera.api.schema.Attribute) ImmutableList(com.google.common.collect.ImmutableList) LocalDate(java.time.LocalDate) Span(edu.uci.ics.texera.api.span.Span) ListVector(org.apache.arrow.vector.complex.ListVector) DataflowException(edu.uci.ics.texera.api.exception.DataflowException)

Example 4 with ListVector

use of org.apache.arrow.vector.complex.ListVector in project flink by apache.

the class ArrayWriter method doWrite.

@Override
public void doWrite(T in, int ordinal) {
    if (!isNullAt(in, ordinal)) {
        ((ListVector) getValueVector()).startNewValue(getCount());
        ArrayData array = readArray(in, ordinal);
        for (int i = 0; i < array.size(); i++) {
            elementWriter.write(array, i);
        }
        ((ListVector) getValueVector()).endValue(getCount(), array.size());
    }
}
Also used : ListVector(org.apache.arrow.vector.complex.ListVector) ArrayData(org.apache.flink.table.data.ArrayData)

Example 5 with ListVector

use of org.apache.arrow.vector.complex.ListVector in project flink by apache.

the class ArrowUtils method createColumnVector.

public static ColumnVector createColumnVector(ValueVector vector, LogicalType fieldType) {
    if (vector instanceof TinyIntVector) {
        return new ArrowTinyIntColumnVector((TinyIntVector) vector);
    } else if (vector instanceof SmallIntVector) {
        return new ArrowSmallIntColumnVector((SmallIntVector) vector);
    } else if (vector instanceof IntVector) {
        return new ArrowIntColumnVector((IntVector) vector);
    } else if (vector instanceof BigIntVector) {
        return new ArrowBigIntColumnVector((BigIntVector) vector);
    } else if (vector instanceof BitVector) {
        return new ArrowBooleanColumnVector((BitVector) vector);
    } else if (vector instanceof Float4Vector) {
        return new ArrowFloatColumnVector((Float4Vector) vector);
    } else if (vector instanceof Float8Vector) {
        return new ArrowDoubleColumnVector((Float8Vector) vector);
    } else if (vector instanceof VarCharVector) {
        return new ArrowVarCharColumnVector((VarCharVector) vector);
    } else if (vector instanceof VarBinaryVector) {
        return new ArrowVarBinaryColumnVector((VarBinaryVector) vector);
    } else if (vector instanceof DecimalVector) {
        return new ArrowDecimalColumnVector((DecimalVector) vector);
    } else if (vector instanceof DateDayVector) {
        return new ArrowDateColumnVector((DateDayVector) vector);
    } else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
        return new ArrowTimeColumnVector(vector);
    } else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
        return new ArrowTimestampColumnVector(vector);
    } else if (vector instanceof ListVector) {
        ListVector listVector = (ListVector) vector;
        return new ArrowArrayColumnVector(listVector, createColumnVector(listVector.getDataVector(), ((ArrayType) fieldType).getElementType()));
    } else if (vector instanceof StructVector) {
        StructVector structVector = (StructVector) vector;
        ColumnVector[] fieldColumns = new ColumnVector[structVector.size()];
        for (int i = 0; i < fieldColumns.length; ++i) {
            fieldColumns[i] = createColumnVector(structVector.getVectorById(i), ((RowType) fieldType).getTypeAt(i));
        }
        return new ArrowRowColumnVector(structVector, fieldColumns);
    } else {
        throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) TimeSecVector(org.apache.arrow.vector.TimeSecVector) ArrowArrayColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowArrayColumnVector) ArrowTinyIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTinyIntColumnVector) ArrowIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowIntColumnVector) TimeMilliVector(org.apache.arrow.vector.TimeMilliVector) RowType(org.apache.flink.table.types.logical.RowType) ArrowBooleanColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBooleanColumnVector) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) ArrowDoubleColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDoubleColumnVector) ArrowDateColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDateColumnVector) ArrowSmallIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowSmallIntColumnVector) DateDayVector(org.apache.arrow.vector.DateDayVector) ArrowVarBinaryColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarBinaryColumnVector) ArrowDecimalColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDecimalColumnVector) ArrowFloatColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowFloatColumnVector) DecimalVector(org.apache.arrow.vector.DecimalVector) ArrowTimeColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimeColumnVector) TimeNanoVector(org.apache.arrow.vector.TimeNanoVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) ArrowVarCharColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarCharColumnVector) BigIntVector(org.apache.arrow.vector.BigIntVector) ArrowBigIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBigIntColumnVector) ArrowVarBinaryColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarBinaryColumnVector) ArrowIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowIntColumnVector) ArrowDoubleColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDoubleColumnVector) ArrowTinyIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTinyIntColumnVector) ArrowDateColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDateColumnVector) ArrowBooleanColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBooleanColumnVector) ArrowDecimalColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDecimalColumnVector) ArrowVarCharColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarCharColumnVector) ArrowTimestampColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimestampColumnVector) ColumnVector(org.apache.flink.table.data.columnar.vector.ColumnVector) ArrowRowColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowRowColumnVector) ArrowSmallIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowSmallIntColumnVector) ArrowFloatColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowFloatColumnVector) ArrowArrayColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowArrayColumnVector) ArrowTimeColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimeColumnVector) TimeStampVector(org.apache.arrow.vector.TimeStampVector) TimeMicroVector(org.apache.arrow.vector.TimeMicroVector) ListVector(org.apache.arrow.vector.complex.ListVector) ArrowBigIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBigIntColumnVector) ArrowTimestampColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimestampColumnVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) ArrowRowColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowRowColumnVector)

Aggregations

ListVector (org.apache.arrow.vector.complex.ListVector)8 BitVector (org.apache.arrow.vector.BitVector)5 Float8Vector (org.apache.arrow.vector.Float8Vector)5 IntVector (org.apache.arrow.vector.IntVector)5 VarCharVector (org.apache.arrow.vector.VarCharVector)5 StructVector (org.apache.arrow.vector.complex.StructVector)5 BigIntVector (org.apache.arrow.vector.BigIntVector)4 DateDayVector (org.apache.arrow.vector.DateDayVector)4 DecimalVector (org.apache.arrow.vector.DecimalVector)4 Float4Vector (org.apache.arrow.vector.Float4Vector)4 SmallIntVector (org.apache.arrow.vector.SmallIntVector)4 TinyIntVector (org.apache.arrow.vector.TinyIntVector)4 VarBinaryVector (org.apache.arrow.vector.VarBinaryVector)4 TimeMicroVector (org.apache.arrow.vector.TimeMicroVector)3 TimeMilliVector (org.apache.arrow.vector.TimeMilliVector)3 TimeNanoVector (org.apache.arrow.vector.TimeNanoVector)3 TimeSecVector (org.apache.arrow.vector.TimeSecVector)3 TimeStampVector (org.apache.arrow.vector.TimeStampVector)3 RowType (org.apache.flink.table.types.logical.RowType)3 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)2