use of org.apache.arrow.vector.BitVector in project flink by apache.
the class ArrowUtils method createArrowFieldWriterForArray.
private static ArrowFieldWriter<ArrayData> createArrowFieldWriterForArray(ValueVector vector, LogicalType fieldType) {
if (vector instanceof TinyIntVector) {
return TinyIntWriter.forArray((TinyIntVector) vector);
} else if (vector instanceof SmallIntVector) {
return SmallIntWriter.forArray((SmallIntVector) vector);
} else if (vector instanceof IntVector) {
return IntWriter.forArray((IntVector) vector);
} else if (vector instanceof BigIntVector) {
return BigIntWriter.forArray((BigIntVector) vector);
} else if (vector instanceof BitVector) {
return BooleanWriter.forArray((BitVector) vector);
} else if (vector instanceof Float4Vector) {
return FloatWriter.forArray((Float4Vector) vector);
} else if (vector instanceof Float8Vector) {
return DoubleWriter.forArray((Float8Vector) vector);
} else if (vector instanceof VarCharVector) {
return VarCharWriter.forArray((VarCharVector) vector);
} else if (vector instanceof VarBinaryVector) {
return VarBinaryWriter.forArray((VarBinaryVector) vector);
} else if (vector instanceof DecimalVector) {
DecimalVector decimalVector = (DecimalVector) vector;
return DecimalWriter.forArray(decimalVector, getPrecision(decimalVector), decimalVector.getScale());
} else if (vector instanceof DateDayVector) {
return DateWriter.forArray((DateDayVector) vector);
} else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
return TimeWriter.forArray(vector);
} else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
int precision;
if (fieldType instanceof LocalZonedTimestampType) {
precision = ((LocalZonedTimestampType) fieldType).getPrecision();
} else {
precision = ((TimestampType) fieldType).getPrecision();
}
return TimestampWriter.forArray(vector, precision);
} else if (vector instanceof ListVector) {
ListVector listVector = (ListVector) vector;
LogicalType elementType = ((ArrayType) fieldType).getElementType();
return ArrayWriter.forArray(listVector, createArrowFieldWriterForArray(listVector.getDataVector(), elementType));
} else if (vector instanceof StructVector) {
RowType rowType = (RowType) fieldType;
ArrowFieldWriter<RowData>[] fieldsWriters = new ArrowFieldWriter[rowType.getFieldCount()];
for (int i = 0; i < fieldsWriters.length; i++) {
fieldsWriters[i] = createArrowFieldWriterForRow(((StructVector) vector).getVectorById(i), rowType.getTypeAt(i));
}
return RowWriter.forArray((StructVector) vector, fieldsWriters);
} else {
throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
}
}
use of org.apache.arrow.vector.BitVector in project flink by apache.
the class ArrowUtils method createColumnVector.
public static ColumnVector createColumnVector(ValueVector vector, LogicalType fieldType) {
if (vector instanceof TinyIntVector) {
return new ArrowTinyIntColumnVector((TinyIntVector) vector);
} else if (vector instanceof SmallIntVector) {
return new ArrowSmallIntColumnVector((SmallIntVector) vector);
} else if (vector instanceof IntVector) {
return new ArrowIntColumnVector((IntVector) vector);
} else if (vector instanceof BigIntVector) {
return new ArrowBigIntColumnVector((BigIntVector) vector);
} else if (vector instanceof BitVector) {
return new ArrowBooleanColumnVector((BitVector) vector);
} else if (vector instanceof Float4Vector) {
return new ArrowFloatColumnVector((Float4Vector) vector);
} else if (vector instanceof Float8Vector) {
return new ArrowDoubleColumnVector((Float8Vector) vector);
} else if (vector instanceof VarCharVector) {
return new ArrowVarCharColumnVector((VarCharVector) vector);
} else if (vector instanceof VarBinaryVector) {
return new ArrowVarBinaryColumnVector((VarBinaryVector) vector);
} else if (vector instanceof DecimalVector) {
return new ArrowDecimalColumnVector((DecimalVector) vector);
} else if (vector instanceof DateDayVector) {
return new ArrowDateColumnVector((DateDayVector) vector);
} else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
return new ArrowTimeColumnVector(vector);
} else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
return new ArrowTimestampColumnVector(vector);
} else if (vector instanceof ListVector) {
ListVector listVector = (ListVector) vector;
return new ArrowArrayColumnVector(listVector, createColumnVector(listVector.getDataVector(), ((ArrayType) fieldType).getElementType()));
} else if (vector instanceof StructVector) {
StructVector structVector = (StructVector) vector;
ColumnVector[] fieldColumns = new ColumnVector[structVector.size()];
for (int i = 0; i < fieldColumns.length; ++i) {
fieldColumns[i] = createColumnVector(structVector.getVectorById(i), ((RowType) fieldType).getTypeAt(i));
}
return new ArrowRowColumnVector(structVector, fieldColumns);
} else {
throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
}
}
use of org.apache.arrow.vector.BitVector in project flink by apache.
the class ArrowUtils method createArrowFieldWriterForRow.
private static ArrowFieldWriter<RowData> createArrowFieldWriterForRow(ValueVector vector, LogicalType fieldType) {
if (vector instanceof TinyIntVector) {
return TinyIntWriter.forRow((TinyIntVector) vector);
} else if (vector instanceof SmallIntVector) {
return SmallIntWriter.forRow((SmallIntVector) vector);
} else if (vector instanceof IntVector) {
return IntWriter.forRow((IntVector) vector);
} else if (vector instanceof BigIntVector) {
return BigIntWriter.forRow((BigIntVector) vector);
} else if (vector instanceof BitVector) {
return BooleanWriter.forRow((BitVector) vector);
} else if (vector instanceof Float4Vector) {
return FloatWriter.forRow((Float4Vector) vector);
} else if (vector instanceof Float8Vector) {
return DoubleWriter.forRow((Float8Vector) vector);
} else if (vector instanceof VarCharVector) {
return VarCharWriter.forRow((VarCharVector) vector);
} else if (vector instanceof VarBinaryVector) {
return VarBinaryWriter.forRow((VarBinaryVector) vector);
} else if (vector instanceof DecimalVector) {
DecimalVector decimalVector = (DecimalVector) vector;
return DecimalWriter.forRow(decimalVector, getPrecision(decimalVector), decimalVector.getScale());
} else if (vector instanceof DateDayVector) {
return DateWriter.forRow((DateDayVector) vector);
} else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
return TimeWriter.forRow(vector);
} else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
int precision;
if (fieldType instanceof LocalZonedTimestampType) {
precision = ((LocalZonedTimestampType) fieldType).getPrecision();
} else {
precision = ((TimestampType) fieldType).getPrecision();
}
return TimestampWriter.forRow(vector, precision);
} else if (vector instanceof ListVector) {
ListVector listVector = (ListVector) vector;
LogicalType elementType = ((ArrayType) fieldType).getElementType();
return ArrayWriter.forRow(listVector, createArrowFieldWriterForArray(listVector.getDataVector(), elementType));
} else if (vector instanceof StructVector) {
RowType rowType = (RowType) fieldType;
ArrowFieldWriter<RowData>[] fieldsWriters = new ArrowFieldWriter[rowType.getFieldCount()];
for (int i = 0; i < fieldsWriters.length; i++) {
fieldsWriters[i] = createArrowFieldWriterForRow(((StructVector) vector).getVectorById(i), rowType.getTypeAt(i));
}
return RowWriter.forRow((StructVector) vector, fieldsWriters);
} else {
throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
}
}
use of org.apache.arrow.vector.BitVector in project beam by apache.
the class ArrowConversionTest method rowIterator.
@Test
public void rowIterator() {
org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(asList(field("int32", new ArrowType.Int(32, true)), field("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), field("string", new ArrowType.Utf8()), field("timestampMicroUTC", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")), field("timestampMilliUTC", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), field("int32_list", new ArrowType.List(), field("int32s", new ArrowType.Int(32, true))), field("boolean", new ArrowType.Bool()), field("fixed_size_binary", new ArrowType.FixedSizeBinary(3))));
Schema beamSchema = ArrowConversion.ArrowSchemaTranslator.toBeamSchema(schema);
VectorSchemaRoot expectedSchemaRoot = VectorSchemaRoot.create(schema, allocator);
expectedSchemaRoot.allocateNew();
expectedSchemaRoot.setRowCount(16);
IntVector intVector = (IntVector) expectedSchemaRoot.getFieldVectors().get(0);
Float8Vector floatVector = (Float8Vector) expectedSchemaRoot.getFieldVectors().get(1);
VarCharVector strVector = (VarCharVector) expectedSchemaRoot.getFieldVectors().get(2);
TimeStampMicroTZVector timestampMicroUtcVector = (TimeStampMicroTZVector) expectedSchemaRoot.getFieldVectors().get(3);
TimeStampMilliTZVector timeStampMilliTZVector = (TimeStampMilliTZVector) expectedSchemaRoot.getFieldVectors().get(4);
ListVector int32ListVector = (ListVector) expectedSchemaRoot.getFieldVectors().get(5);
IntVector int32ListElementVector = int32ListVector.<IntVector>addOrGetVector(new org.apache.arrow.vector.types.pojo.FieldType(false, new ArrowType.Int(32, true), null)).getVector();
BitVector boolVector = (BitVector) expectedSchemaRoot.getFieldVectors().get(6);
FixedSizeBinaryVector fixedSizeBinaryVector = (FixedSizeBinaryVector) expectedSchemaRoot.getFieldVectors().get(7);
ArrayList<Row> expectedRows = new ArrayList<>();
for (int i = 0; i < 16; i++) {
DateTime dt = new DateTime(2019, 1, i + 1, i, i, i, DateTimeZone.UTC);
expectedRows.add(Row.withSchema(beamSchema).addValues(i, i + .1 * i, "" + i, dt, dt, ImmutableList.of(i), (i % 2) != 0, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) }).build());
intVector.set(i, i);
floatVector.set(i, i + .1 * i);
strVector.set(i, new Text("" + i));
timestampMicroUtcVector.set(i, dt.getMillis() * 1000);
timeStampMilliTZVector.set(i, dt.getMillis());
int32ListVector.startNewValue(i);
int32ListElementVector.set(i, i);
int32ListVector.endValue(i, 1);
boolVector.set(i, i % 2);
fixedSizeBinaryVector.set(i, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) });
}
assertThat(ImmutableList.copyOf(ArrowConversion.rowsFromRecordBatch(beamSchema, expectedSchemaRoot)), IsIterableContainingInOrder.contains(expectedRows.stream().map((row) -> equalTo(row)).collect(ImmutableList.toImmutableList())));
expectedSchemaRoot.close();
}
use of org.apache.arrow.vector.BitVector in project hive by apache.
the class Deserializer method readPrimitive.
private void readPrimitive(FieldVector arrowVector, ColumnVector hiveVector) {
final Types.MinorType minorType = arrowVector.getMinorType();
final int size = arrowVector.getValueCount();
switch(minorType) {
case BIT:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((LongColumnVector) hiveVector).vector[i] = ((BitVector) arrowVector).get(i);
}
}
}
break;
case TINYINT:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((LongColumnVector) hiveVector).vector[i] = ((TinyIntVector) arrowVector).get(i);
}
}
}
break;
case SMALLINT:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((LongColumnVector) hiveVector).vector[i] = ((SmallIntVector) arrowVector).get(i);
}
}
}
break;
case INT:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((LongColumnVector) hiveVector).vector[i] = ((IntVector) arrowVector).get(i);
}
}
}
break;
case BIGINT:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((LongColumnVector) hiveVector).vector[i] = ((BigIntVector) arrowVector).get(i);
}
}
}
break;
case FLOAT4:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((DoubleColumnVector) hiveVector).vector[i] = ((Float4Vector) arrowVector).get(i);
}
}
}
break;
case FLOAT8:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((DoubleColumnVector) hiveVector).vector[i] = ((Float8Vector) arrowVector).get(i);
}
}
}
break;
case VARCHAR:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((BytesColumnVector) hiveVector).setVal(i, ((VarCharVector) arrowVector).get(i));
}
}
}
break;
case DATEDAY:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((LongColumnVector) hiveVector).vector[i] = ((DateDayVector) arrowVector).get(i);
}
}
}
break;
case TIMESTAMPMILLI:
case TIMESTAMPMILLITZ:
case TIMESTAMPMICRO:
case TIMESTAMPMICROTZ:
case TIMESTAMPNANO:
case TIMESTAMPNANOTZ:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
// Time = second + sub-second
final long time = ((TimeStampVector) arrowVector).get(i);
long second;
int subSecondInNanos;
switch(minorType) {
case TIMESTAMPMILLI:
case TIMESTAMPMILLITZ:
{
subSecondInNanos = (int) ((time % MILLIS_PER_SECOND) * NS_PER_MILLIS);
second = time / MILLIS_PER_SECOND;
}
break;
case TIMESTAMPMICROTZ:
case TIMESTAMPMICRO:
{
subSecondInNanos = (int) ((time % MICROS_PER_SECOND) * NS_PER_MICROS);
second = time / MICROS_PER_SECOND;
}
break;
case TIMESTAMPNANOTZ:
case TIMESTAMPNANO:
{
subSecondInNanos = (int) (time % NS_PER_SECOND);
second = time / NS_PER_SECOND;
}
break;
default:
throw new IllegalArgumentException();
}
final TimestampColumnVector timestampColumnVector = (TimestampColumnVector) hiveVector;
// A nanosecond value should not be negative
if (subSecondInNanos < 0) {
// So add one second to the negative nanosecond value to make it positive
subSecondInNanos += NS_PER_SECOND;
// Subtract one second from the second value because we added one second
second -= 1;
}
timestampColumnVector.time[i] = second * MILLIS_PER_SECOND;
timestampColumnVector.nanos[i] = subSecondInNanos;
}
}
}
break;
case VARBINARY:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((BytesColumnVector) hiveVector).setVal(i, ((VarBinaryVector) arrowVector).get(i));
}
}
}
break;
case DECIMAL:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((DecimalColumnVector) hiveVector).set(i, HiveDecimal.create(((DecimalVector) arrowVector).getObject(i)));
}
}
}
break;
case INTERVALYEAR:
{
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
((LongColumnVector) hiveVector).vector[i] = ((IntervalYearVector) arrowVector).get(i);
}
}
}
break;
case INTERVALDAY:
{
final IntervalDayVector intervalDayVector = (IntervalDayVector) arrowVector;
final NullableIntervalDayHolder intervalDayHolder = new NullableIntervalDayHolder();
final HiveIntervalDayTime intervalDayTime = new HiveIntervalDayTime();
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
intervalDayVector.get(i, intervalDayHolder);
final long seconds = intervalDayHolder.days * SECOND_PER_DAY + intervalDayHolder.milliseconds / MILLIS_PER_SECOND;
final int nanos = (intervalDayHolder.milliseconds % 1_000) * NS_PER_MILLIS;
intervalDayTime.set(seconds, nanos);
((IntervalDayTimeColumnVector) hiveVector).set(i, intervalDayTime);
}
}
}
break;
default:
throw new IllegalArgumentException();
}
}
Aggregations