Search in sources :

Example 1 with StructVector

use of org.apache.arrow.vector.complex.StructVector in project flink by apache.

the class ArrowUtils method createArrowFieldWriterForArray.

private static ArrowFieldWriter<ArrayData> createArrowFieldWriterForArray(ValueVector vector, LogicalType fieldType) {
    if (vector instanceof TinyIntVector) {
        return TinyIntWriter.forArray((TinyIntVector) vector);
    } else if (vector instanceof SmallIntVector) {
        return SmallIntWriter.forArray((SmallIntVector) vector);
    } else if (vector instanceof IntVector) {
        return IntWriter.forArray((IntVector) vector);
    } else if (vector instanceof BigIntVector) {
        return BigIntWriter.forArray((BigIntVector) vector);
    } else if (vector instanceof BitVector) {
        return BooleanWriter.forArray((BitVector) vector);
    } else if (vector instanceof Float4Vector) {
        return FloatWriter.forArray((Float4Vector) vector);
    } else if (vector instanceof Float8Vector) {
        return DoubleWriter.forArray((Float8Vector) vector);
    } else if (vector instanceof VarCharVector) {
        return VarCharWriter.forArray((VarCharVector) vector);
    } else if (vector instanceof VarBinaryVector) {
        return VarBinaryWriter.forArray((VarBinaryVector) vector);
    } else if (vector instanceof DecimalVector) {
        DecimalVector decimalVector = (DecimalVector) vector;
        return DecimalWriter.forArray(decimalVector, getPrecision(decimalVector), decimalVector.getScale());
    } else if (vector instanceof DateDayVector) {
        return DateWriter.forArray((DateDayVector) vector);
    } else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
        return TimeWriter.forArray(vector);
    } else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
        int precision;
        if (fieldType instanceof LocalZonedTimestampType) {
            precision = ((LocalZonedTimestampType) fieldType).getPrecision();
        } else {
            precision = ((TimestampType) fieldType).getPrecision();
        }
        return TimestampWriter.forArray(vector, precision);
    } else if (vector instanceof ListVector) {
        ListVector listVector = (ListVector) vector;
        LogicalType elementType = ((ArrayType) fieldType).getElementType();
        return ArrayWriter.forArray(listVector, createArrowFieldWriterForArray(listVector.getDataVector(), elementType));
    } else if (vector instanceof StructVector) {
        RowType rowType = (RowType) fieldType;
        ArrowFieldWriter<RowData>[] fieldsWriters = new ArrowFieldWriter[rowType.getFieldCount()];
        for (int i = 0; i < fieldsWriters.length; i++) {
            fieldsWriters[i] = createArrowFieldWriterForRow(((StructVector) vector).getVectorById(i), rowType.getTypeAt(i));
        }
        return RowWriter.forArray((StructVector) vector, fieldsWriters);
    } else {
        throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) TimeSecVector(org.apache.arrow.vector.TimeSecVector) TimeMilliVector(org.apache.arrow.vector.TimeMilliVector) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) ArrowFieldWriter(org.apache.flink.table.runtime.arrow.writers.ArrowFieldWriter) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) DateDayVector(org.apache.arrow.vector.DateDayVector) DecimalVector(org.apache.arrow.vector.DecimalVector) TimeNanoVector(org.apache.arrow.vector.TimeNanoVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) BigIntVector(org.apache.arrow.vector.BigIntVector) TimeStampVector(org.apache.arrow.vector.TimeStampVector) TimeMicroVector(org.apache.arrow.vector.TimeMicroVector) ListVector(org.apache.arrow.vector.complex.ListVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Example 2 with StructVector

use of org.apache.arrow.vector.complex.StructVector in project flink by apache.

the class RowWriter method doWrite.

@Override
public void doWrite(T in, int ordinal) {
    RowData row;
    if (isNullAt(in, ordinal)) {
        row = nullRow;
        ((StructVector) getValueVector()).setNull(getCount());
    } else {
        row = readRow(in, ordinal);
        ((StructVector) getValueVector()).setIndexDefined(getCount());
    }
    for (int i = 0; i < fieldsWriters.length; i++) {
        fieldsWriters[i].write(row, i);
    }
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) StructVector(org.apache.arrow.vector.complex.StructVector)

Example 3 with StructVector

use of org.apache.arrow.vector.complex.StructVector in project textdb by TextDB.

the class NltkSentimentOperator method convertListOfSpans.

// For now we're only allowing List<Span>. This can (and should) be generalized in the future.
private void convertListOfSpans(ImmutableList<Span> spansList, ListVector listVector, int index, String name) {
    if (index == 0) {
        if (innerIndexMap.containsKey(name))
            innerIndexMap.replace(name, 0);
        else
            innerIndexMap.put(name, 0);
    }
    int innerIndex = innerIndexMap.get(name);
    int size = spansList.size();
    StructVector subElementsVector = (StructVector) listVector.getDataVector();
    listVector.startNewValue(index);
    VarCharVector attributeNameVector = (VarCharVector) subElementsVector.getVectorById(0);
    IntVector startVector = (IntVector) subElementsVector.getVectorById(1);
    IntVector endVector = (IntVector) subElementsVector.getVectorById(2);
    VarCharVector keyVector = (VarCharVector) subElementsVector.getVectorById(3);
    VarCharVector valueVector = (VarCharVector) subElementsVector.getVectorById(4);
    IntVector tokenOffsetVector = (IntVector) subElementsVector.getVectorById(5);
    for (int i = 0; i < size; i++) {
        if (spansList.get(i) == null) {
            subElementsVector.setNull(innerIndex);
        } else {
            subElementsVector.setIndexDefined(innerIndex);
            Span span = spansList.get(i);
            // For all the fields of the struct
            if (span.getAttributeName() != null)
                attributeNameVector.setSafe(innerIndex, span.getAttributeName().getBytes(StandardCharsets.UTF_8));
            startVector.setSafe(innerIndex, span.getStart());
            endVector.setSafe(innerIndex, span.getEnd());
            if (span.getKey() != null)
                keyVector.setSafe(innerIndex, span.getKey().getBytes(StandardCharsets.UTF_8));
            if (span.getValue() != null)
                valueVector.setSafe(innerIndex, span.getValue().getBytes(StandardCharsets.UTF_8));
            tokenOffsetVector.setSafe(innerIndex, span.getTokenOffset());
        }
        innerIndex++;
    }
    innerIndexMap.replace(name, innerIndex);
    listVector.endValue(index, size);
}
Also used : StructVector(org.apache.arrow.vector.complex.StructVector) Span(edu.uci.ics.texera.api.span.Span)

Example 4 with StructVector

use of org.apache.arrow.vector.complex.StructVector in project textdb by TextDB.

the class NltkSentimentOperator method vectorizeTupleToPython.

private void vectorizeTupleToPython(Tuple tuple, int index, VectorSchemaRoot schemaRoot) {
    for (Attribute a : tuple.getSchema().getAttributes()) {
        String name = a.getName();
        // When it is null, skip it.
        if (tuple.getField(name).getValue() == null)
            continue;
        switch(a.getType()) {
            case INTEGER:
                ((IntVector) schemaRoot.getVector(name)).setSafe(index, (int) tuple.getField(name).getValue());
                break;
            case DOUBLE:
                ((Float8Vector) schemaRoot.getVector(name)).setSafe(index, (double) tuple.getField(name).getValue());
                break;
            case BOOLEAN:
            // break;
            case TEXT:
            case STRING:
            case _ID_TYPE:
                ((VarCharVector) schemaRoot.getVector(name)).setSafe(index, tuple.getField(name).getValue().toString().getBytes(StandardCharsets.UTF_8));
                break;
            case DATE:
                ((DateDayVector) schemaRoot.getVector(name)).setSafe(index, (int) ((LocalDate) tuple.getField(name).getValue()).toEpochDay());
                break;
            case DATETIME:
                StructVector dateTimeStructs = ((StructVector) schemaRoot.getVector(name));
                if (tuple.getField(name).getValue() != null) {
                    dateTimeStructs.setIndexDefined(index);
                    DateDayVector subVectorDay = (DateDayVector) dateTimeStructs.getVectorById(0);
                    TimeSecVector subVectorTime = (TimeSecVector) dateTimeStructs.getVectorById(1);
                    LocalDateTime value = (LocalDateTime) tuple.getField(name).getValue();
                    subVectorDay.setSafe(index, (int) value.toLocalDate().toEpochDay());
                    subVectorTime.setSafe(index, value.toLocalTime().toSecondOfDay());
                } else
                    dateTimeStructs.setNull(index);
                break;
            case LIST:
                // For now only supporting span.
                if (((ImmutableList) tuple.getField(name).getValue()).get(0).getClass() != Span.class) {
                    throw (new DataflowException("Unsupported Element Type for List Field!"));
                } else {
                    ListVector listVector = (ListVector) schemaRoot.getVector(name);
                    ImmutableList<Span> spansList = (ImmutableList<Span>) tuple.getField(name).getValue();
                    convertListOfSpans(spansList, listVector, index, name);
                }
                break;
            default:
                break;
        }
    }
}
Also used : LocalDateTime(java.time.LocalDateTime) StructVector(org.apache.arrow.vector.complex.StructVector) Attribute(edu.uci.ics.texera.api.schema.Attribute) ImmutableList(com.google.common.collect.ImmutableList) LocalDate(java.time.LocalDate) Span(edu.uci.ics.texera.api.span.Span) ListVector(org.apache.arrow.vector.complex.ListVector) DataflowException(edu.uci.ics.texera.api.exception.DataflowException)

Example 5 with StructVector

use of org.apache.arrow.vector.complex.StructVector in project flink by apache.

the class ArrowUtils method createColumnVector.

public static ColumnVector createColumnVector(ValueVector vector, LogicalType fieldType) {
    if (vector instanceof TinyIntVector) {
        return new ArrowTinyIntColumnVector((TinyIntVector) vector);
    } else if (vector instanceof SmallIntVector) {
        return new ArrowSmallIntColumnVector((SmallIntVector) vector);
    } else if (vector instanceof IntVector) {
        return new ArrowIntColumnVector((IntVector) vector);
    } else if (vector instanceof BigIntVector) {
        return new ArrowBigIntColumnVector((BigIntVector) vector);
    } else if (vector instanceof BitVector) {
        return new ArrowBooleanColumnVector((BitVector) vector);
    } else if (vector instanceof Float4Vector) {
        return new ArrowFloatColumnVector((Float4Vector) vector);
    } else if (vector instanceof Float8Vector) {
        return new ArrowDoubleColumnVector((Float8Vector) vector);
    } else if (vector instanceof VarCharVector) {
        return new ArrowVarCharColumnVector((VarCharVector) vector);
    } else if (vector instanceof VarBinaryVector) {
        return new ArrowVarBinaryColumnVector((VarBinaryVector) vector);
    } else if (vector instanceof DecimalVector) {
        return new ArrowDecimalColumnVector((DecimalVector) vector);
    } else if (vector instanceof DateDayVector) {
        return new ArrowDateColumnVector((DateDayVector) vector);
    } else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
        return new ArrowTimeColumnVector(vector);
    } else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
        return new ArrowTimestampColumnVector(vector);
    } else if (vector instanceof ListVector) {
        ListVector listVector = (ListVector) vector;
        return new ArrowArrayColumnVector(listVector, createColumnVector(listVector.getDataVector(), ((ArrayType) fieldType).getElementType()));
    } else if (vector instanceof StructVector) {
        StructVector structVector = (StructVector) vector;
        ColumnVector[] fieldColumns = new ColumnVector[structVector.size()];
        for (int i = 0; i < fieldColumns.length; ++i) {
            fieldColumns[i] = createColumnVector(structVector.getVectorById(i), ((RowType) fieldType).getTypeAt(i));
        }
        return new ArrowRowColumnVector(structVector, fieldColumns);
    } else {
        throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) TimeSecVector(org.apache.arrow.vector.TimeSecVector) ArrowArrayColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowArrayColumnVector) ArrowTinyIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTinyIntColumnVector) ArrowIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowIntColumnVector) TimeMilliVector(org.apache.arrow.vector.TimeMilliVector) RowType(org.apache.flink.table.types.logical.RowType) ArrowBooleanColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBooleanColumnVector) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) ArrowDoubleColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDoubleColumnVector) ArrowDateColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDateColumnVector) ArrowSmallIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowSmallIntColumnVector) DateDayVector(org.apache.arrow.vector.DateDayVector) ArrowVarBinaryColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarBinaryColumnVector) ArrowDecimalColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDecimalColumnVector) ArrowFloatColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowFloatColumnVector) DecimalVector(org.apache.arrow.vector.DecimalVector) ArrowTimeColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimeColumnVector) TimeNanoVector(org.apache.arrow.vector.TimeNanoVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) ArrowVarCharColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarCharColumnVector) BigIntVector(org.apache.arrow.vector.BigIntVector) ArrowBigIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBigIntColumnVector) ArrowVarBinaryColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarBinaryColumnVector) ArrowIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowIntColumnVector) ArrowDoubleColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDoubleColumnVector) ArrowTinyIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTinyIntColumnVector) ArrowDateColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDateColumnVector) ArrowBooleanColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBooleanColumnVector) ArrowDecimalColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDecimalColumnVector) ArrowVarCharColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarCharColumnVector) ArrowTimestampColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimestampColumnVector) ColumnVector(org.apache.flink.table.data.columnar.vector.ColumnVector) ArrowRowColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowRowColumnVector) ArrowSmallIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowSmallIntColumnVector) ArrowFloatColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowFloatColumnVector) ArrowArrayColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowArrayColumnVector) ArrowTimeColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimeColumnVector) TimeStampVector(org.apache.arrow.vector.TimeStampVector) TimeMicroVector(org.apache.arrow.vector.TimeMicroVector) ListVector(org.apache.arrow.vector.complex.ListVector) ArrowBigIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBigIntColumnVector) ArrowTimestampColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimestampColumnVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) ArrowRowColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowRowColumnVector)

Aggregations

StructVector (org.apache.arrow.vector.complex.StructVector)6 ListVector (org.apache.arrow.vector.complex.ListVector)4 BigIntVector (org.apache.arrow.vector.BigIntVector)3 BitVector (org.apache.arrow.vector.BitVector)3 DateDayVector (org.apache.arrow.vector.DateDayVector)3 DecimalVector (org.apache.arrow.vector.DecimalVector)3 Float4Vector (org.apache.arrow.vector.Float4Vector)3 Float8Vector (org.apache.arrow.vector.Float8Vector)3 IntVector (org.apache.arrow.vector.IntVector)3 SmallIntVector (org.apache.arrow.vector.SmallIntVector)3 TimeMicroVector (org.apache.arrow.vector.TimeMicroVector)3 TimeMilliVector (org.apache.arrow.vector.TimeMilliVector)3 TimeNanoVector (org.apache.arrow.vector.TimeNanoVector)3 TimeSecVector (org.apache.arrow.vector.TimeSecVector)3 TimeStampVector (org.apache.arrow.vector.TimeStampVector)3 TinyIntVector (org.apache.arrow.vector.TinyIntVector)3 VarBinaryVector (org.apache.arrow.vector.VarBinaryVector)3 VarCharVector (org.apache.arrow.vector.VarCharVector)3 RowType (org.apache.flink.table.types.logical.RowType)3 Span (edu.uci.ics.texera.api.span.Span)2