use of org.apache.flink.table.data.columnar.vector.heap.HeapBooleanVector in project flink by apache.
the class ColumnVectorTest method testNulls.
@Test
public void testNulls() {
HeapBooleanVector vector = new HeapBooleanVector(SIZE);
for (int i = 0; i < SIZE; i++) {
if (i % 2 == 0) {
vector.setNullAt(i);
}
}
for (int i = 0; i < SIZE; i++) {
if (i % 2 == 0) {
assertTrue(vector.isNullAt(i));
} else {
assertFalse(vector.isNullAt(i));
}
}
vector.fillWithNulls();
for (int i = 0; i < SIZE; i++) {
assertTrue(vector.isNullAt(i));
}
vector.reset();
for (int i = 0; i < SIZE; i++) {
assertFalse(vector.isNullAt(i));
}
vector.setNulls(0, SIZE / 2);
for (int i = 0; i < SIZE / 2; i++) {
assertTrue(vector.isNullAt(i));
}
}
use of org.apache.flink.table.data.columnar.vector.heap.HeapBooleanVector in project flink by apache.
the class ParquetSplitReaderUtil method createVectorFromConstant.
public static ColumnVector createVectorFromConstant(LogicalType type, Object value, int batchSize) {
switch(type.getTypeRoot()) {
case CHAR:
case VARCHAR:
case BINARY:
case VARBINARY:
HeapBytesVector bsv = new HeapBytesVector(batchSize);
if (value == null) {
bsv.fillWithNulls();
} else {
bsv.fill(value instanceof byte[] ? (byte[]) value : value.toString().getBytes(StandardCharsets.UTF_8));
}
return bsv;
case BOOLEAN:
HeapBooleanVector bv = new HeapBooleanVector(batchSize);
if (value == null) {
bv.fillWithNulls();
} else {
bv.fill((boolean) value);
}
return bv;
case TINYINT:
HeapByteVector byteVector = new HeapByteVector(batchSize);
if (value == null) {
byteVector.fillWithNulls();
} else {
byteVector.fill(((Number) value).byteValue());
}
return byteVector;
case SMALLINT:
HeapShortVector sv = new HeapShortVector(batchSize);
if (value == null) {
sv.fillWithNulls();
} else {
sv.fill(((Number) value).shortValue());
}
return sv;
case INTEGER:
HeapIntVector iv = new HeapIntVector(batchSize);
if (value == null) {
iv.fillWithNulls();
} else {
iv.fill(((Number) value).intValue());
}
return iv;
case BIGINT:
HeapLongVector lv = new HeapLongVector(batchSize);
if (value == null) {
lv.fillWithNulls();
} else {
lv.fill(((Number) value).longValue());
}
return lv;
case DECIMAL:
DecimalType decimalType = (DecimalType) type;
int precision = decimalType.getPrecision();
int scale = decimalType.getScale();
DecimalData decimal = value == null ? null : Preconditions.checkNotNull(DecimalData.fromBigDecimal((BigDecimal) value, precision, scale));
ColumnVector internalVector;
if (ParquetSchemaConverter.is32BitDecimal(precision)) {
internalVector = createVectorFromConstant(new IntType(), decimal == null ? null : (int) decimal.toUnscaledLong(), batchSize);
} else if (ParquetSchemaConverter.is64BitDecimal(precision)) {
internalVector = createVectorFromConstant(new BigIntType(), decimal == null ? null : decimal.toUnscaledLong(), batchSize);
} else {
internalVector = createVectorFromConstant(new VarBinaryType(), decimal == null ? null : decimal.toUnscaledBytes(), batchSize);
}
return new ParquetDecimalVector(internalVector);
case FLOAT:
HeapFloatVector fv = new HeapFloatVector(batchSize);
if (value == null) {
fv.fillWithNulls();
} else {
fv.fill(((Number) value).floatValue());
}
return fv;
case DOUBLE:
HeapDoubleVector dv = new HeapDoubleVector(batchSize);
if (value == null) {
dv.fillWithNulls();
} else {
dv.fill(((Number) value).doubleValue());
}
return dv;
case DATE:
if (value instanceof LocalDate) {
value = Date.valueOf((LocalDate) value);
}
return createVectorFromConstant(new IntType(), value == null ? null : toInternal((Date) value), batchSize);
case TIMESTAMP_WITHOUT_TIME_ZONE:
HeapTimestampVector tv = new HeapTimestampVector(batchSize);
if (value == null) {
tv.fillWithNulls();
} else {
tv.fill(TimestampData.fromLocalDateTime((LocalDateTime) value));
}
return tv;
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
use of org.apache.flink.table.data.columnar.vector.heap.HeapBooleanVector in project flink by apache.
the class ColumnVectorTest method testBoolean.
@Test
public void testBoolean() {
HeapBooleanVector vector = new HeapBooleanVector(SIZE);
for (int i = 0; i < SIZE; i++) {
vector.setBoolean(i, i % 2 == 0);
}
for (int i = 0; i < SIZE; i++) {
assertEquals(i % 2 == 0, vector.getBoolean(i));
}
vector.fill(true);
for (int i = 0; i < SIZE; i++) {
assertTrue(vector.getBoolean(i));
}
}
use of org.apache.flink.table.data.columnar.vector.heap.HeapBooleanVector in project flink by apache.
the class VectorizedColumnBatchTest method testTyped.
@Test
public void testTyped() throws IOException {
HeapBooleanVector col0 = new HeapBooleanVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col0.vector[i] = i % 2 == 0;
}
HeapBytesVector col1 = new HeapBytesVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
byte[] bytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
col1.appendBytes(i, bytes, 0, bytes.length);
}
HeapByteVector col2 = new HeapByteVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col2.vector[i] = (byte) i;
}
HeapDoubleVector col3 = new HeapDoubleVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col3.vector[i] = i;
}
HeapFloatVector col4 = new HeapFloatVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col4.vector[i] = i;
}
HeapIntVector col5 = new HeapIntVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col5.vector[i] = i;
}
HeapLongVector col6 = new HeapLongVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col6.vector[i] = i;
}
HeapShortVector col7 = new HeapShortVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col7.vector[i] = (short) i;
}
// The precision of Timestamp in parquet should be one of MILLIS, MICROS or NANOS.
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp
//
// For MILLIS, the underlying INT64 holds milliseconds
// For MICROS, the underlying INT64 holds microseconds
// For NANOS, the underlying INT96 holds nanoOfDay(8 bytes) and julianDay(4 bytes)
long[] vector8 = new long[VECTOR_SIZE];
for (int i = 0; i < VECTOR_SIZE; i++) {
vector8[i] = i;
}
TimestampColumnVector col8 = new TimestampColumnVector() {
@Override
public boolean isNullAt(int i) {
return false;
}
@Override
public TimestampData getTimestamp(int i, int precision) {
return TimestampData.fromEpochMillis(vector8[i]);
}
};
long[] vector9 = new long[VECTOR_SIZE];
for (int i = 0; i < VECTOR_SIZE; i++) {
vector9[i] = i * 1000;
}
TimestampColumnVector col9 = new TimestampColumnVector() {
@Override
public TimestampData getTimestamp(int i, int precision) {
long microseconds = vector9[i];
return TimestampData.fromEpochMillis(microseconds / 1000, (int) (microseconds % 1000) * 1000);
}
@Override
public boolean isNullAt(int i) {
return false;
}
};
HeapBytesVector vector10 = new HeapBytesVector(VECTOR_SIZE);
{
int nanosecond = 123456789;
int start = 0;
ByteArrayOutputStream out = new ByteArrayOutputStream();
for (int i = 0; i < VECTOR_SIZE; i++) {
byte[] bytes = new byte[12];
// i means second
long l = i * 1000000000L + nanosecond;
for (int j = 0; j < 8; j++) {
bytes[7 - j] = (byte) l;
l >>>= 8;
}
// Epoch Julian
int n = 2440588;
for (int j = 0; j < 4; j++) {
bytes[11 - j] = (byte) n;
n >>>= 8;
}
vector10.start[i] = start;
vector10.length[i] = 12;
start += 12;
out.write(bytes);
}
vector10.buffer = out.toByteArray();
}
TimestampColumnVector col10 = new TimestampColumnVector() {
@Override
public TimestampData getTimestamp(int colId, int precision) {
byte[] bytes = vector10.getBytes(colId).getBytes();
assert bytes.length == 12;
long nanoOfDay = 0;
for (int i = 0; i < 8; i++) {
nanoOfDay <<= 8;
nanoOfDay |= (bytes[i] & (0xff));
}
int julianDay = 0;
for (int i = 8; i < 12; i++) {
julianDay <<= 8;
julianDay |= (bytes[i] & (0xff));
}
long millisecond = (julianDay - DateTimeUtils.EPOCH_JULIAN) * DateTimeUtils.MILLIS_PER_DAY + nanoOfDay / 1000000;
int nanoOfMillisecond = (int) (nanoOfDay % 1000000);
return TimestampData.fromEpochMillis(millisecond, nanoOfMillisecond);
}
@Override
public boolean isNullAt(int i) {
return false;
}
};
long[] vector11 = new long[VECTOR_SIZE];
DecimalColumnVector col11 = new DecimalColumnVector() {
@Override
public boolean isNullAt(int i) {
return false;
}
@Override
public DecimalData getDecimal(int i, int precision, int scale) {
return DecimalData.fromUnscaledLong(vector11[i], precision, scale);
}
};
for (int i = 0; i < VECTOR_SIZE; i++) {
vector11[i] = i;
}
HeapIntVector col12Data = new HeapIntVector(VECTOR_SIZE * ARRAY_SIZE);
for (int i = 0; i < VECTOR_SIZE * ARRAY_SIZE; i++) {
col12Data.vector[i] = i;
}
ArrayColumnVector col12 = new ArrayColumnVector() {
@Override
public boolean isNullAt(int i) {
return false;
}
@Override
public ArrayData getArray(int i) {
return new ColumnarArrayData(col12Data, i * ARRAY_SIZE, ARRAY_SIZE);
}
};
VectorizedColumnBatch batch = new VectorizedColumnBatch(new ColumnVector[] { col0, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12 });
batch.setNumRows(VECTOR_SIZE);
for (int i = 0; i < batch.getNumRows(); i++) {
ColumnarRowData row = new ColumnarRowData(batch, i);
assertEquals(row.getBoolean(0), i % 2 == 0);
assertEquals(row.getString(1).toString(), String.valueOf(i));
assertEquals(row.getByte(2), (byte) i);
assertEquals(row.getDouble(3), i, 0);
assertEquals(row.getFloat(4), (float) i, 0);
assertEquals(row.getInt(5), i);
assertEquals(row.getLong(6), i);
assertEquals(row.getShort(7), (short) i);
assertEquals(row.getTimestamp(8, 3).getMillisecond(), i);
assertEquals(row.getTimestamp(9, 6).getMillisecond(), i);
assertEquals(row.getTimestamp(10, 9).getMillisecond(), i * 1000L + 123);
assertEquals(row.getTimestamp(10, 9).getNanoOfMillisecond(), 456789);
assertEquals(row.getDecimal(11, 10, 0).toUnscaledLong(), i);
for (int j = 0; j < ARRAY_SIZE; j++) {
assertEquals(row.getArray(12).getInt(j), i * ARRAY_SIZE + j);
}
}
assertEquals(VECTOR_SIZE, batch.getNumRows());
}
Aggregations