use of org.apache.flink.table.data.columnar.ColumnarArrayData in project flink by apache.
the class VectorizedColumnBatchTest method testTyped.
@Test
public void testTyped() throws IOException {
HeapBooleanVector col0 = new HeapBooleanVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col0.vector[i] = i % 2 == 0;
}
HeapBytesVector col1 = new HeapBytesVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
byte[] bytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
col1.appendBytes(i, bytes, 0, bytes.length);
}
HeapByteVector col2 = new HeapByteVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col2.vector[i] = (byte) i;
}
HeapDoubleVector col3 = new HeapDoubleVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col3.vector[i] = i;
}
HeapFloatVector col4 = new HeapFloatVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col4.vector[i] = i;
}
HeapIntVector col5 = new HeapIntVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col5.vector[i] = i;
}
HeapLongVector col6 = new HeapLongVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col6.vector[i] = i;
}
HeapShortVector col7 = new HeapShortVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col7.vector[i] = (short) i;
}
// The precision of Timestamp in parquet should be one of MILLIS, MICROS or NANOS.
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp
//
// For MILLIS, the underlying INT64 holds milliseconds
// For MICROS, the underlying INT64 holds microseconds
// For NANOS, the underlying INT96 holds nanoOfDay(8 bytes) and julianDay(4 bytes)
long[] vector8 = new long[VECTOR_SIZE];
for (int i = 0; i < VECTOR_SIZE; i++) {
vector8[i] = i;
}
TimestampColumnVector col8 = new TimestampColumnVector() {
@Override
public boolean isNullAt(int i) {
return false;
}
@Override
public TimestampData getTimestamp(int i, int precision) {
return TimestampData.fromEpochMillis(vector8[i]);
}
};
long[] vector9 = new long[VECTOR_SIZE];
for (int i = 0; i < VECTOR_SIZE; i++) {
vector9[i] = i * 1000;
}
TimestampColumnVector col9 = new TimestampColumnVector() {
@Override
public TimestampData getTimestamp(int i, int precision) {
long microseconds = vector9[i];
return TimestampData.fromEpochMillis(microseconds / 1000, (int) (microseconds % 1000) * 1000);
}
@Override
public boolean isNullAt(int i) {
return false;
}
};
HeapBytesVector vector10 = new HeapBytesVector(VECTOR_SIZE);
{
int nanosecond = 123456789;
int start = 0;
ByteArrayOutputStream out = new ByteArrayOutputStream();
for (int i = 0; i < VECTOR_SIZE; i++) {
byte[] bytes = new byte[12];
// i means second
long l = i * 1000000000L + nanosecond;
for (int j = 0; j < 8; j++) {
bytes[7 - j] = (byte) l;
l >>>= 8;
}
// Epoch Julian
int n = 2440588;
for (int j = 0; j < 4; j++) {
bytes[11 - j] = (byte) n;
n >>>= 8;
}
vector10.start[i] = start;
vector10.length[i] = 12;
start += 12;
out.write(bytes);
}
vector10.buffer = out.toByteArray();
}
TimestampColumnVector col10 = new TimestampColumnVector() {
@Override
public TimestampData getTimestamp(int colId, int precision) {
byte[] bytes = vector10.getBytes(colId).getBytes();
assert bytes.length == 12;
long nanoOfDay = 0;
for (int i = 0; i < 8; i++) {
nanoOfDay <<= 8;
nanoOfDay |= (bytes[i] & (0xff));
}
int julianDay = 0;
for (int i = 8; i < 12; i++) {
julianDay <<= 8;
julianDay |= (bytes[i] & (0xff));
}
long millisecond = (julianDay - DateTimeUtils.EPOCH_JULIAN) * DateTimeUtils.MILLIS_PER_DAY + nanoOfDay / 1000000;
int nanoOfMillisecond = (int) (nanoOfDay % 1000000);
return TimestampData.fromEpochMillis(millisecond, nanoOfMillisecond);
}
@Override
public boolean isNullAt(int i) {
return false;
}
};
long[] vector11 = new long[VECTOR_SIZE];
DecimalColumnVector col11 = new DecimalColumnVector() {
@Override
public boolean isNullAt(int i) {
return false;
}
@Override
public DecimalData getDecimal(int i, int precision, int scale) {
return DecimalData.fromUnscaledLong(vector11[i], precision, scale);
}
};
for (int i = 0; i < VECTOR_SIZE; i++) {
vector11[i] = i;
}
HeapIntVector col12Data = new HeapIntVector(VECTOR_SIZE * ARRAY_SIZE);
for (int i = 0; i < VECTOR_SIZE * ARRAY_SIZE; i++) {
col12Data.vector[i] = i;
}
ArrayColumnVector col12 = new ArrayColumnVector() {
@Override
public boolean isNullAt(int i) {
return false;
}
@Override
public ArrayData getArray(int i) {
return new ColumnarArrayData(col12Data, i * ARRAY_SIZE, ARRAY_SIZE);
}
};
VectorizedColumnBatch batch = new VectorizedColumnBatch(new ColumnVector[] { col0, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12 });
batch.setNumRows(VECTOR_SIZE);
for (int i = 0; i < batch.getNumRows(); i++) {
ColumnarRowData row = new ColumnarRowData(batch, i);
assertEquals(row.getBoolean(0), i % 2 == 0);
assertEquals(row.getString(1).toString(), String.valueOf(i));
assertEquals(row.getByte(2), (byte) i);
assertEquals(row.getDouble(3), i, 0);
assertEquals(row.getFloat(4), (float) i, 0);
assertEquals(row.getInt(5), i);
assertEquals(row.getLong(6), i);
assertEquals(row.getShort(7), (short) i);
assertEquals(row.getTimestamp(8, 3).getMillisecond(), i);
assertEquals(row.getTimestamp(9, 6).getMillisecond(), i);
assertEquals(row.getTimestamp(10, 9).getMillisecond(), i * 1000L + 123);
assertEquals(row.getTimestamp(10, 9).getNanoOfMillisecond(), 456789);
assertEquals(row.getDecimal(11, 10, 0).toUnscaledLong(), i);
for (int j = 0; j < ARRAY_SIZE; j++) {
assertEquals(row.getArray(12).getInt(j), i * ARRAY_SIZE + j);
}
}
assertEquals(VECTOR_SIZE, batch.getNumRows());
}
Aggregations