Search in sources :

Example 6 with VarBinaryType

use of org.apache.flink.table.types.logical.VarBinaryType in project flink by apache.

the class ArrowUtilsTest method init.

@BeforeClass
public static void init() {
    testFields = new ArrayList<>();
    testFields.add(Tuple5.of("f1", new TinyIntType(), new ArrowType.Int(8, true), TinyIntWriter.TinyIntWriterForRow.class, ArrowTinyIntColumnVector.class));
    testFields.add(Tuple5.of("f2", new SmallIntType(), new ArrowType.Int(8 * 2, true), SmallIntWriter.SmallIntWriterForRow.class, ArrowSmallIntColumnVector.class));
    testFields.add(Tuple5.of("f3", new IntType(), new ArrowType.Int(8 * 4, true), IntWriter.IntWriterForRow.class, ArrowIntColumnVector.class));
    testFields.add(Tuple5.of("f4", new BigIntType(), new ArrowType.Int(8 * 8, true), BigIntWriter.BigIntWriterForRow.class, ArrowBigIntColumnVector.class));
    testFields.add(Tuple5.of("f5", new BooleanType(), new ArrowType.Bool(), BooleanWriter.BooleanWriterForRow.class, ArrowBooleanColumnVector.class));
    testFields.add(Tuple5.of("f6", new FloatType(), new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), FloatWriter.FloatWriterForRow.class, ArrowFloatColumnVector.class));
    testFields.add(Tuple5.of("f7", new DoubleType(), new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), DoubleWriter.DoubleWriterForRow.class, ArrowDoubleColumnVector.class));
    testFields.add(Tuple5.of("f8", new VarCharType(), ArrowType.Utf8.INSTANCE, VarCharWriter.VarCharWriterForRow.class, ArrowVarCharColumnVector.class));
    testFields.add(Tuple5.of("f9", new VarBinaryType(), ArrowType.Binary.INSTANCE, VarBinaryWriter.VarBinaryWriterForRow.class, ArrowVarBinaryColumnVector.class));
    testFields.add(Tuple5.of("f10", new DecimalType(10, 3), new ArrowType.Decimal(10, 3), DecimalWriter.DecimalWriterForRow.class, ArrowDecimalColumnVector.class));
    testFields.add(Tuple5.of("f11", new DateType(), new ArrowType.Date(DateUnit.DAY), DateWriter.DateWriterForRow.class, ArrowDateColumnVector.class));
    testFields.add(Tuple5.of("f13", new TimeType(0), new ArrowType.Time(TimeUnit.SECOND, 32), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
    testFields.add(Tuple5.of("f14", new TimeType(2), new ArrowType.Time(TimeUnit.MILLISECOND, 32), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
    testFields.add(Tuple5.of("f15", new TimeType(4), new ArrowType.Time(TimeUnit.MICROSECOND, 64), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
    testFields.add(Tuple5.of("f16", new TimeType(8), new ArrowType.Time(TimeUnit.NANOSECOND, 64), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
    testFields.add(Tuple5.of("f17", new LocalZonedTimestampType(0), new ArrowType.Timestamp(TimeUnit.SECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f18", new LocalZonedTimestampType(2), new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f19", new LocalZonedTimestampType(4), new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f20", new LocalZonedTimestampType(8), new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f21", new TimestampType(0), new ArrowType.Timestamp(TimeUnit.SECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f22", new TimestampType(2), new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f23", new TimestampType(4), new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f24", new TimestampType(8), new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
    testFields.add(Tuple5.of("f25", new ArrayType(new VarCharType()), ArrowType.List.INSTANCE, ArrayWriter.ArrayWriterForRow.class, ArrowArrayColumnVector.class));
    RowType rowFieldType = new RowType(Arrays.asList(new RowType.RowField("a", new IntType()), new RowType.RowField("b", new VarCharType()), new RowType.RowField("c", new ArrayType(new VarCharType())), new RowType.RowField("d", new TimestampType(2)), new RowType.RowField("e", new RowType((Arrays.asList(new RowType.RowField("e1", new IntType()), new RowType.RowField("e2", new VarCharType())))))));
    testFields.add(Tuple5.of("f26", rowFieldType, ArrowType.Struct.INSTANCE, RowWriter.RowWriterForRow.class, ArrowRowColumnVector.class));
    List<RowType.RowField> rowFields = new ArrayList<>();
    for (Tuple5<String, LogicalType, ArrowType, Class<?>, Class<?>> field : testFields) {
        rowFields.add(new RowType.RowField(field.f0, field.f1));
    }
    rowType = new RowType(rowFields);
    allocator = ArrowUtils.getRootAllocator().newChildAllocator("stdout", 0, Long.MAX_VALUE);
}
Also used : ArrowArrayColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowArrayColumnVector) ArrayList(java.util.ArrayList) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) FloatType(org.apache.flink.table.types.logical.FloatType) ArrayType(org.apache.flink.table.types.logical.ArrayType) DateType(org.apache.flink.table.types.logical.DateType) ArrowTimeColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimeColumnVector) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) DecimalType(org.apache.flink.table.types.logical.DecimalType) BeforeClass(org.junit.BeforeClass) ArrowTimestampColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTimestampColumnVector) VarBinaryType(org.apache.flink.table.types.logical.VarBinaryType) ArrowTinyIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowTinyIntColumnVector) ArrowIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowIntColumnVector) BigIntType(org.apache.flink.table.types.logical.BigIntType) RowType(org.apache.flink.table.types.logical.RowType) ArrowBooleanColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBooleanColumnVector) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) TimeType(org.apache.flink.table.types.logical.TimeType) ArrowDoubleColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDoubleColumnVector) ArrowDateColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDateColumnVector) ArrowSmallIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowSmallIntColumnVector) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) TimestampType(org.apache.flink.table.types.logical.TimestampType) ArrowVarBinaryColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarBinaryColumnVector) VarCharType(org.apache.flink.table.types.logical.VarCharType) ArrowDecimalColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowDecimalColumnVector) ArrowFloatColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowFloatColumnVector) BooleanType(org.apache.flink.table.types.logical.BooleanType) ArrowVarCharColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowVarCharColumnVector) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) DoubleType(org.apache.flink.table.types.logical.DoubleType) ArrowBigIntColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowBigIntColumnVector) ArrowRowColumnVector(org.apache.flink.table.runtime.arrow.vectors.ArrowRowColumnVector) BeforeClass(org.junit.BeforeClass)

Example 7 with VarBinaryType

use of org.apache.flink.table.types.logical.VarBinaryType in project flink by apache.

the class SortCodeGeneratorTest method value2.

private Object value2(LogicalType type, Random rnd) {
    switch(type.getTypeRoot()) {
        case BOOLEAN:
            return false;
        case TINYINT:
            return (byte) 0;
        case SMALLINT:
            return (short) 0;
        case INTEGER:
            return 0;
        case BIGINT:
            return 0L;
        case FLOAT:
            return 0f;
        case DOUBLE:
            return 0d;
        case VARCHAR:
            return StringData.fromString("0");
        case DECIMAL:
            DecimalType decimalType = (DecimalType) type;
            return DecimalData.fromBigDecimal(new BigDecimal(0), decimalType.getPrecision(), decimalType.getScale());
        case TIMESTAMP_WITHOUT_TIME_ZONE:
            return TimestampData.fromEpochMillis(0);
        case ARRAY:
        case VARBINARY:
            byte[] bytes = new byte[rnd.nextInt(7) + 10];
            rnd.nextBytes(bytes);
            return type instanceof VarBinaryType ? bytes : BinaryArrayData.fromPrimitiveArray(bytes);
        case ROW:
            RowType rowType = (RowType) type;
            if (rowType.getFields().get(0).getType().getTypeRoot() == INTEGER) {
                return GenericRowData.of(rnd.nextInt());
            } else {
                return GenericRowData.of(GenericRowData.of(new Object[] { null }));
            }
        case RAW:
            return RawValueData.fromObject(rnd.nextInt());
        default:
            throw new RuntimeException("Not support!");
    }
}
Also used : VarBinaryType(org.apache.flink.table.types.logical.VarBinaryType) DecimalType(org.apache.flink.table.types.logical.DecimalType) RowType(org.apache.flink.table.types.logical.RowType) BigDecimal(java.math.BigDecimal)

Example 8 with VarBinaryType

use of org.apache.flink.table.types.logical.VarBinaryType in project flink by apache.

the class SortCodeGeneratorTest method value3.

private Object value3(LogicalType type, Random rnd) {
    switch(type.getTypeRoot()) {
        case BOOLEAN:
            return true;
        case TINYINT:
            return Byte.MAX_VALUE;
        case SMALLINT:
            return Short.MAX_VALUE;
        case INTEGER:
            return Integer.MAX_VALUE;
        case BIGINT:
            return Long.MAX_VALUE;
        case FLOAT:
            return Float.MAX_VALUE;
        case DOUBLE:
            return Double.MAX_VALUE;
        case VARCHAR:
            return StringData.fromString(RandomStringUtils.random(100));
        case DECIMAL:
            DecimalType decimalType = (DecimalType) type;
            return DecimalData.fromBigDecimal(new BigDecimal(Integer.MAX_VALUE), decimalType.getPrecision(), decimalType.getScale());
        case TIMESTAMP_WITHOUT_TIME_ZONE:
            return TimestampData.fromEpochMillis(Long.MAX_VALUE, 999999);
        case ARRAY:
        case VARBINARY:
            byte[] bytes = new byte[rnd.nextInt(100) + 100];
            rnd.nextBytes(bytes);
            return type instanceof VarBinaryType ? bytes : BinaryArrayData.fromPrimitiveArray(bytes);
        case ROW:
            RowType rowType = (RowType) type;
            if (rowType.getFields().get(0).getType().getTypeRoot() == INTEGER) {
                return GenericRowData.of(rnd.nextInt());
            } else {
                return GenericRowData.of(GenericRowData.of(rnd.nextInt()));
            }
        case RAW:
            return RawValueData.fromObject(rnd.nextInt());
        default:
            throw new RuntimeException("Not support!");
    }
}
Also used : VarBinaryType(org.apache.flink.table.types.logical.VarBinaryType) DecimalType(org.apache.flink.table.types.logical.DecimalType) RowType(org.apache.flink.table.types.logical.RowType) BigDecimal(java.math.BigDecimal)

Example 9 with VarBinaryType

use of org.apache.flink.table.types.logical.VarBinaryType in project flink by apache.

the class SortCodeGeneratorTest method testInner.

private void testInner() throws Exception {
    List<MemorySegment> segments = new ArrayList<>();
    for (int i = 0; i < 100; i++) {
        segments.add(MemorySegmentFactory.wrap(new byte[32768]));
    }
    Tuple2<NormalizedKeyComputer, RecordComparator> tuple2 = getSortBaseWithNulls(this.getClass().getSimpleName(), inputType, sortSpec);
    BinaryRowDataSerializer serializer = new BinaryRowDataSerializer(inputType.getFieldCount());
    BinaryInMemorySortBuffer sortBuffer = BinaryInMemorySortBuffer.createBuffer(tuple2.f0, (AbstractRowDataSerializer) serializer, serializer, tuple2.f1, new ListMemorySegmentPool(segments));
    BinaryRowData[] dataArray = getTestData();
    List<BinaryRowData> data = Arrays.asList(dataArray.clone());
    List<BinaryRowData> binaryRows = Arrays.asList(dataArray.clone());
    Collections.shuffle(binaryRows);
    for (BinaryRowData row : binaryRows) {
        if (!sortBuffer.write(row)) {
            throw new RuntimeException();
        }
    }
    new QuickSort().sort(sortBuffer);
    MutableObjectIterator<BinaryRowData> iter = sortBuffer.getIterator();
    List<BinaryRowData> result = new ArrayList<>();
    BinaryRowData row = serializer.createInstance();
    while ((row = iter.next(row)) != null) {
        result.add(row.copy());
    }
    int[] keys = sortSpec.getFieldIndices();
    LogicalType[] keyTypes = sortSpec.getFieldTypes(inputType);
    boolean[] orders = sortSpec.getAscendingOrders();
    data.sort((o1, o2) -> {
        for (int i = 0; i < keys.length; i++) {
            LogicalType t = inputType.getTypeAt(keys[i]);
            boolean order = orders[i];
            Object first = null;
            Object second = null;
            RowData.FieldGetter fieldGetter = RowData.createFieldGetter(keyTypes[i], keys[i]);
            if (!o1.isNullAt(keys[i])) {
                first = fieldGetter.getFieldOrNull(o1);
            }
            if (!o2.isNullAt(keys[i])) {
                second = fieldGetter.getFieldOrNull(o2);
            }
            if (first != null || second != null) {
                if (first == null) {
                    return order ? -1 : 1;
                }
                if (second == null) {
                    return order ? 1 : -1;
                }
                if (first instanceof Comparable) {
                    int ret = ((Comparable) first).compareTo(second);
                    if (ret != 0) {
                        return order ? ret : -ret;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.ARRAY) {
                    BinaryArrayData leftArray = (BinaryArrayData) first;
                    BinaryArrayData rightArray = (BinaryArrayData) second;
                    int minLength = Math.min(leftArray.size(), rightArray.size());
                    for (int j = 0; j < minLength; j++) {
                        boolean isNullLeft = leftArray.isNullAt(j);
                        boolean isNullRight = rightArray.isNullAt(j);
                        if (isNullLeft && isNullRight) {
                        // Do nothing.
                        } else if (isNullLeft) {
                            return order ? -1 : 1;
                        } else if (isNullRight) {
                            return order ? 1 : -1;
                        } else {
                            int comp = Byte.compare(leftArray.getByte(j), rightArray.getByte(j));
                            if (comp != 0) {
                                return order ? comp : -comp;
                            }
                        }
                    }
                    if (leftArray.size() < rightArray.size()) {
                        return order ? -1 : 1;
                    } else if (leftArray.size() > rightArray.size()) {
                        return order ? 1 : -1;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.VARBINARY) {
                    int comp = org.apache.flink.table.runtime.operators.sort.SortUtil.compareBinary((byte[]) first, (byte[]) second);
                    if (comp != 0) {
                        return order ? comp : -comp;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.ROW) {
                    RowType rowType = (RowType) t;
                    int comp;
                    if (rowType.getFields().get(0).getType() instanceof IntType) {
                        comp = INT_ROW_COMP.compare(INT_ROW_CONV.toExternal(first), INT_ROW_CONV.toExternal(second));
                    } else {
                        comp = NEST_ROW_COMP.compare(NEST_ROW_CONV.toExternal(first), NEST_ROW_CONV.toExternal(second));
                    }
                    if (comp != 0) {
                        return order ? comp : -comp;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.RAW) {
                    Integer i1 = ((RawValueData<Integer>) first).toObject(IntSerializer.INSTANCE);
                    Integer i2 = ((RawValueData<Integer>) second).toObject(IntSerializer.INSTANCE);
                    int comp = Integer.compare(i1, i2);
                    if (comp != 0) {
                        return order ? comp : -comp;
                    }
                } else {
                    throw new RuntimeException();
                }
            }
        }
        return 0;
    });
    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < data.size(); i++) {
        builder.append("\n").append("expect: ").append(DataFormatTestUtil.rowDataToString(data.get(i), inputType)).append("; actual: ").append(DataFormatTestUtil.rowDataToString(result.get(i), inputType));
    }
    builder.append("\n").append("types: ").append(Arrays.asList(inputType.getChildren()));
    builder.append("\n").append("keys: ").append(Arrays.toString(keys));
    String msg = builder.toString();
    for (int i = 0; i < data.size(); i++) {
        for (int j = 0; j < keys.length; j++) {
            boolean isNull1 = data.get(i).isNullAt(keys[j]);
            boolean isNull2 = result.get(i).isNullAt(keys[j]);
            Assert.assertEquals(msg, isNull1, isNull2);
            if (!isNull1 || !isNull2) {
                RowData.FieldGetter fieldGetter = RowData.createFieldGetter(keyTypes[j], keys[j]);
                Object o1 = fieldGetter.getFieldOrNull(data.get(i));
                Object o2 = fieldGetter.getFieldOrNull(result.get(i));
                if (keyTypes[j] instanceof VarBinaryType) {
                    Assert.assertArrayEquals(msg, (byte[]) o1, (byte[]) o2);
                } else if (keyTypes[j] instanceof TypeInformationRawType) {
                    assertThat(msg, (RawValueData) o1, equivalent((RawValueData) o2, new RawValueDataSerializer<>(IntSerializer.INSTANCE)));
                } else {
                    Assert.assertEquals(msg, o1, o2);
                }
            }
        }
    }
}
Also used : VarBinaryType(org.apache.flink.table.types.logical.VarBinaryType) ListMemorySegmentPool(org.apache.flink.table.runtime.operators.sort.ListMemorySegmentPool) ArrayList(java.util.ArrayList) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) TypeInformationRawType(org.apache.flink.table.types.logical.TypeInformationRawType) NormalizedKeyComputer(org.apache.flink.table.runtime.generated.NormalizedKeyComputer) GeneratedNormalizedKeyComputer(org.apache.flink.table.runtime.generated.GeneratedNormalizedKeyComputer) RawValueData(org.apache.flink.table.data.RawValueData) BinaryArrayData(org.apache.flink.table.data.binary.BinaryArrayData) MemorySegment(org.apache.flink.core.memory.MemorySegment) RecordComparator(org.apache.flink.table.runtime.generated.RecordComparator) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator) QuickSort(org.apache.flink.runtime.operators.sort.QuickSort) BinaryInMemorySortBuffer(org.apache.flink.table.runtime.operators.sort.BinaryInMemorySortBuffer) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Aggregations

VarBinaryType (org.apache.flink.table.types.logical.VarBinaryType)9 DecimalType (org.apache.flink.table.types.logical.DecimalType)7 RowType (org.apache.flink.table.types.logical.RowType)6 BigIntType (org.apache.flink.table.types.logical.BigIntType)5 IntType (org.apache.flink.table.types.logical.IntType)5 ArrayList (java.util.ArrayList)4 SmallIntType (org.apache.flink.table.types.logical.SmallIntType)4 TimestampType (org.apache.flink.table.types.logical.TimestampType)4 TinyIntType (org.apache.flink.table.types.logical.TinyIntType)4 VarCharType (org.apache.flink.table.types.logical.VarCharType)4 BigDecimal (java.math.BigDecimal)3 ArrayType (org.apache.flink.table.types.logical.ArrayType)3 BooleanType (org.apache.flink.table.types.logical.BooleanType)3 DateType (org.apache.flink.table.types.logical.DateType)3 DoubleType (org.apache.flink.table.types.logical.DoubleType)3 FloatType (org.apache.flink.table.types.logical.FloatType)3 LocalZonedTimestampType (org.apache.flink.table.types.logical.LocalZonedTimestampType)3 LogicalType (org.apache.flink.table.types.logical.LogicalType)3 TimeType (org.apache.flink.table.types.logical.TimeType)3 LocalDateTime (java.time.LocalDateTime)2