use of org.apache.arrow.vector.complex.ListVector in project flink by apache.
the class ArrowUtils method createArrowFieldWriterForArray.
private static ArrowFieldWriter<ArrayData> createArrowFieldWriterForArray(ValueVector vector, LogicalType fieldType) {
if (vector instanceof TinyIntVector) {
return TinyIntWriter.forArray((TinyIntVector) vector);
} else if (vector instanceof SmallIntVector) {
return SmallIntWriter.forArray((SmallIntVector) vector);
} else if (vector instanceof IntVector) {
return IntWriter.forArray((IntVector) vector);
} else if (vector instanceof BigIntVector) {
return BigIntWriter.forArray((BigIntVector) vector);
} else if (vector instanceof BitVector) {
return BooleanWriter.forArray((BitVector) vector);
} else if (vector instanceof Float4Vector) {
return FloatWriter.forArray((Float4Vector) vector);
} else if (vector instanceof Float8Vector) {
return DoubleWriter.forArray((Float8Vector) vector);
} else if (vector instanceof VarCharVector) {
return VarCharWriter.forArray((VarCharVector) vector);
} else if (vector instanceof VarBinaryVector) {
return VarBinaryWriter.forArray((VarBinaryVector) vector);
} else if (vector instanceof DecimalVector) {
DecimalVector decimalVector = (DecimalVector) vector;
return DecimalWriter.forArray(decimalVector, getPrecision(decimalVector), decimalVector.getScale());
} else if (vector instanceof DateDayVector) {
return DateWriter.forArray((DateDayVector) vector);
} else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
return TimeWriter.forArray(vector);
} else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
int precision;
if (fieldType instanceof LocalZonedTimestampType) {
precision = ((LocalZonedTimestampType) fieldType).getPrecision();
} else {
precision = ((TimestampType) fieldType).getPrecision();
}
return TimestampWriter.forArray(vector, precision);
} else if (vector instanceof ListVector) {
ListVector listVector = (ListVector) vector;
LogicalType elementType = ((ArrayType) fieldType).getElementType();
return ArrayWriter.forArray(listVector, createArrowFieldWriterForArray(listVector.getDataVector(), elementType));
} else if (vector instanceof StructVector) {
RowType rowType = (RowType) fieldType;
ArrowFieldWriter<RowData>[] fieldsWriters = new ArrowFieldWriter[rowType.getFieldCount()];
for (int i = 0; i < fieldsWriters.length; i++) {
fieldsWriters[i] = createArrowFieldWriterForRow(((StructVector) vector).getVectorById(i), rowType.getTypeAt(i));
}
return RowWriter.forArray((StructVector) vector, fieldsWriters);
} else {
throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
}
}
use of org.apache.arrow.vector.complex.ListVector in project textdb by TextDB.
the class NltkSentimentOperator method convertArrowVectorsToResults.
private void convertArrowVectorsToResults(VectorSchemaRoot schemaRoot) {
List<FieldVector> fieldVectors = schemaRoot.getFieldVectors();
Schema texeraSchema = convertToTexeraSchema(schemaRoot.getSchema());
for (int i = 0; i < schemaRoot.getRowCount(); i++) {
Tuple tuple;
List<IField> texeraFields = new ArrayList<>();
for (FieldVector vector : fieldVectors) {
IField texeraField = null;
try {
switch(vector.getField().getFieldType().getType().getTypeID()) {
case Int:
// It's either IntVector or BigIntVector, but can't know because it depends on Python.
try {
texeraField = new IntegerField(((IntVector) vector).get(i));
} catch (ClassCastException e) {
texeraField = new IntegerField((int) ((BigIntVector) vector).get(i));
}
break;
case FloatingPoint:
texeraField = new DoubleField((((Float8Vector) vector).get(i)));
break;
// break;
case Utf8:
texeraField = new TextField(new String(((VarCharVector) vector).get(i), StandardCharsets.UTF_8));
break;
case Date:
texeraField = new DateField(new Date(((DateDayVector) vector).get(i)));
break;
case Struct:
// For now, struct is only for DateTime
DateDayVector subVectorDay = (DateDayVector) ((StructVector) vector).getChildByOrdinal(0);
TimeSecVector subVectorTime = (TimeSecVector) ((StructVector) vector).getChildByOrdinal(1);
texeraField = new DateTimeField(LocalDateTime.of(LocalDate.ofEpochDay(subVectorDay.get(i)), LocalTime.ofSecondOfDay(subVectorTime.get(i))));
break;
case List:
texeraField = getSpanFromListVector((ListVector) vector, i);
break;
default:
throw (new DataflowException("Unsupported data type " + vector.getField().toString() + " when converting back to Texera table."));
}
} catch (IllegalStateException e) {
if (!e.getMessage().contains("Value at index is null")) {
throw new DataflowException(e);
} else {
switch(vector.getField().getFieldType().getType().getTypeID()) {
case Int:
texeraField = new IntegerField(null);
break;
case FloatingPoint:
texeraField = new DoubleField(null);
break;
case Date:
texeraField = new DateField((String) null);
break;
case Struct:
texeraField = new DateTimeField((String) null);
break;
case List:
texeraField = new ListField<Span>(null);
default:
break;
}
}
}
texeraFields.add(texeraField);
}
tuple = new Tuple(texeraSchema, texeraFields);
resultQueue.add(tuple);
}
}
use of org.apache.arrow.vector.complex.ListVector in project textdb by TextDB.
the class NltkSentimentOperator method vectorizeTupleToPython.
private void vectorizeTupleToPython(Tuple tuple, int index, VectorSchemaRoot schemaRoot) {
for (Attribute a : tuple.getSchema().getAttributes()) {
String name = a.getName();
// When it is null, skip it.
if (tuple.getField(name).getValue() == null)
continue;
switch(a.getType()) {
case INTEGER:
((IntVector) schemaRoot.getVector(name)).setSafe(index, (int) tuple.getField(name).getValue());
break;
case DOUBLE:
((Float8Vector) schemaRoot.getVector(name)).setSafe(index, (double) tuple.getField(name).getValue());
break;
case BOOLEAN:
// break;
case TEXT:
case STRING:
case _ID_TYPE:
((VarCharVector) schemaRoot.getVector(name)).setSafe(index, tuple.getField(name).getValue().toString().getBytes(StandardCharsets.UTF_8));
break;
case DATE:
((DateDayVector) schemaRoot.getVector(name)).setSafe(index, (int) ((LocalDate) tuple.getField(name).getValue()).toEpochDay());
break;
case DATETIME:
StructVector dateTimeStructs = ((StructVector) schemaRoot.getVector(name));
if (tuple.getField(name).getValue() != null) {
dateTimeStructs.setIndexDefined(index);
DateDayVector subVectorDay = (DateDayVector) dateTimeStructs.getVectorById(0);
TimeSecVector subVectorTime = (TimeSecVector) dateTimeStructs.getVectorById(1);
LocalDateTime value = (LocalDateTime) tuple.getField(name).getValue();
subVectorDay.setSafe(index, (int) value.toLocalDate().toEpochDay());
subVectorTime.setSafe(index, value.toLocalTime().toSecondOfDay());
} else
dateTimeStructs.setNull(index);
break;
case LIST:
// For now only supporting span.
if (((ImmutableList) tuple.getField(name).getValue()).get(0).getClass() != Span.class) {
throw (new DataflowException("Unsupported Element Type for List Field!"));
} else {
ListVector listVector = (ListVector) schemaRoot.getVector(name);
ImmutableList<Span> spansList = (ImmutableList<Span>) tuple.getField(name).getValue();
convertListOfSpans(spansList, listVector, index, name);
}
break;
default:
break;
}
}
}
use of org.apache.arrow.vector.complex.ListVector in project flink by apache.
the class ArrayWriter method doWrite.
@Override
public void doWrite(T in, int ordinal) {
if (!isNullAt(in, ordinal)) {
((ListVector) getValueVector()).startNewValue(getCount());
ArrayData array = readArray(in, ordinal);
for (int i = 0; i < array.size(); i++) {
elementWriter.write(array, i);
}
((ListVector) getValueVector()).endValue(getCount(), array.size());
}
}
use of org.apache.arrow.vector.complex.ListVector in project flink by apache.
the class ArrowUtils method createColumnVector.
public static ColumnVector createColumnVector(ValueVector vector, LogicalType fieldType) {
if (vector instanceof TinyIntVector) {
return new ArrowTinyIntColumnVector((TinyIntVector) vector);
} else if (vector instanceof SmallIntVector) {
return new ArrowSmallIntColumnVector((SmallIntVector) vector);
} else if (vector instanceof IntVector) {
return new ArrowIntColumnVector((IntVector) vector);
} else if (vector instanceof BigIntVector) {
return new ArrowBigIntColumnVector((BigIntVector) vector);
} else if (vector instanceof BitVector) {
return new ArrowBooleanColumnVector((BitVector) vector);
} else if (vector instanceof Float4Vector) {
return new ArrowFloatColumnVector((Float4Vector) vector);
} else if (vector instanceof Float8Vector) {
return new ArrowDoubleColumnVector((Float8Vector) vector);
} else if (vector instanceof VarCharVector) {
return new ArrowVarCharColumnVector((VarCharVector) vector);
} else if (vector instanceof VarBinaryVector) {
return new ArrowVarBinaryColumnVector((VarBinaryVector) vector);
} else if (vector instanceof DecimalVector) {
return new ArrowDecimalColumnVector((DecimalVector) vector);
} else if (vector instanceof DateDayVector) {
return new ArrowDateColumnVector((DateDayVector) vector);
} else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
return new ArrowTimeColumnVector(vector);
} else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
return new ArrowTimestampColumnVector(vector);
} else if (vector instanceof ListVector) {
ListVector listVector = (ListVector) vector;
return new ArrowArrayColumnVector(listVector, createColumnVector(listVector.getDataVector(), ((ArrayType) fieldType).getElementType()));
} else if (vector instanceof StructVector) {
StructVector structVector = (StructVector) vector;
ColumnVector[] fieldColumns = new ColumnVector[structVector.size()];
for (int i = 0; i < fieldColumns.length; ++i) {
fieldColumns[i] = createColumnVector(structVector.getVectorById(i), ((RowType) fieldType).getTypeAt(i));
}
return new ArrowRowColumnVector(structVector, fieldColumns);
} else {
throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
}
}
Aggregations