Search in sources :

Example 1 with NormalizedKeyComputer

use of org.apache.flink.table.runtime.generated.NormalizedKeyComputer in project flink by apache.

the class SortOperator method open.

@Override
public void open() throws Exception {
    super.open();
    LOG.info("Opening SortOperator");
    ClassLoader cl = getContainingTask().getUserCodeClassLoader();
    AbstractRowDataSerializer inputSerializer = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(getUserCodeClassloader());
    this.binarySerializer = new BinaryRowDataSerializer(inputSerializer.getArity());
    NormalizedKeyComputer computer = gComputer.newInstance(cl);
    RecordComparator comparator = gComparator.newInstance(cl);
    gComputer = null;
    gComparator = null;
    MemoryManager memManager = getContainingTask().getEnvironment().getMemoryManager();
    this.sorter = new BinaryExternalSorter(this.getContainingTask(), memManager, computeMemorySize(), this.getContainingTask().getEnvironment().getIOManager(), inputSerializer, binarySerializer, computer, comparator, getContainingTask().getJobConfiguration());
    this.sorter.startThreads();
    collector = new StreamRecordCollector<>(output);
    // register the metrics.
    getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) sorter::getUsedMemoryInBytes);
    getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) sorter::getNumSpillFiles);
    getMetricGroup().gauge("spillInBytes", (Gauge<Long>) sorter::getSpillInBytes);
}
Also used : AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer) NormalizedKeyComputer(org.apache.flink.table.runtime.generated.NormalizedKeyComputer) GeneratedNormalizedKeyComputer(org.apache.flink.table.runtime.generated.GeneratedNormalizedKeyComputer) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer) RecordComparator(org.apache.flink.table.runtime.generated.RecordComparator) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator)

Example 2 with NormalizedKeyComputer

use of org.apache.flink.table.runtime.generated.NormalizedKeyComputer in project flink by apache.

the class SortCodeGeneratorTest method testInner.

private void testInner() throws Exception {
    List<MemorySegment> segments = new ArrayList<>();
    for (int i = 0; i < 100; i++) {
        segments.add(MemorySegmentFactory.wrap(new byte[32768]));
    }
    Tuple2<NormalizedKeyComputer, RecordComparator> tuple2 = getSortBaseWithNulls(this.getClass().getSimpleName(), inputType, sortSpec);
    BinaryRowDataSerializer serializer = new BinaryRowDataSerializer(inputType.getFieldCount());
    BinaryInMemorySortBuffer sortBuffer = BinaryInMemorySortBuffer.createBuffer(tuple2.f0, (AbstractRowDataSerializer) serializer, serializer, tuple2.f1, new ListMemorySegmentPool(segments));
    BinaryRowData[] dataArray = getTestData();
    List<BinaryRowData> data = Arrays.asList(dataArray.clone());
    List<BinaryRowData> binaryRows = Arrays.asList(dataArray.clone());
    Collections.shuffle(binaryRows);
    for (BinaryRowData row : binaryRows) {
        if (!sortBuffer.write(row)) {
            throw new RuntimeException();
        }
    }
    new QuickSort().sort(sortBuffer);
    MutableObjectIterator<BinaryRowData> iter = sortBuffer.getIterator();
    List<BinaryRowData> result = new ArrayList<>();
    BinaryRowData row = serializer.createInstance();
    while ((row = iter.next(row)) != null) {
        result.add(row.copy());
    }
    int[] keys = sortSpec.getFieldIndices();
    LogicalType[] keyTypes = sortSpec.getFieldTypes(inputType);
    boolean[] orders = sortSpec.getAscendingOrders();
    data.sort((o1, o2) -> {
        for (int i = 0; i < keys.length; i++) {
            LogicalType t = inputType.getTypeAt(keys[i]);
            boolean order = orders[i];
            Object first = null;
            Object second = null;
            RowData.FieldGetter fieldGetter = RowData.createFieldGetter(keyTypes[i], keys[i]);
            if (!o1.isNullAt(keys[i])) {
                first = fieldGetter.getFieldOrNull(o1);
            }
            if (!o2.isNullAt(keys[i])) {
                second = fieldGetter.getFieldOrNull(o2);
            }
            if (first != null || second != null) {
                if (first == null) {
                    return order ? -1 : 1;
                }
                if (second == null) {
                    return order ? 1 : -1;
                }
                if (first instanceof Comparable) {
                    int ret = ((Comparable) first).compareTo(second);
                    if (ret != 0) {
                        return order ? ret : -ret;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.ARRAY) {
                    BinaryArrayData leftArray = (BinaryArrayData) first;
                    BinaryArrayData rightArray = (BinaryArrayData) second;
                    int minLength = Math.min(leftArray.size(), rightArray.size());
                    for (int j = 0; j < minLength; j++) {
                        boolean isNullLeft = leftArray.isNullAt(j);
                        boolean isNullRight = rightArray.isNullAt(j);
                        if (isNullLeft && isNullRight) {
                        // Do nothing.
                        } else if (isNullLeft) {
                            return order ? -1 : 1;
                        } else if (isNullRight) {
                            return order ? 1 : -1;
                        } else {
                            int comp = Byte.compare(leftArray.getByte(j), rightArray.getByte(j));
                            if (comp != 0) {
                                return order ? comp : -comp;
                            }
                        }
                    }
                    if (leftArray.size() < rightArray.size()) {
                        return order ? -1 : 1;
                    } else if (leftArray.size() > rightArray.size()) {
                        return order ? 1 : -1;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.VARBINARY) {
                    int comp = org.apache.flink.table.runtime.operators.sort.SortUtil.compareBinary((byte[]) first, (byte[]) second);
                    if (comp != 0) {
                        return order ? comp : -comp;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.ROW) {
                    RowType rowType = (RowType) t;
                    int comp;
                    if (rowType.getFields().get(0).getType() instanceof IntType) {
                        comp = INT_ROW_COMP.compare(INT_ROW_CONV.toExternal(first), INT_ROW_CONV.toExternal(second));
                    } else {
                        comp = NEST_ROW_COMP.compare(NEST_ROW_CONV.toExternal(first), NEST_ROW_CONV.toExternal(second));
                    }
                    if (comp != 0) {
                        return order ? comp : -comp;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.RAW) {
                    Integer i1 = ((RawValueData<Integer>) first).toObject(IntSerializer.INSTANCE);
                    Integer i2 = ((RawValueData<Integer>) second).toObject(IntSerializer.INSTANCE);
                    int comp = Integer.compare(i1, i2);
                    if (comp != 0) {
                        return order ? comp : -comp;
                    }
                } else {
                    throw new RuntimeException();
                }
            }
        }
        return 0;
    });
    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < data.size(); i++) {
        builder.append("\n").append("expect: ").append(DataFormatTestUtil.rowDataToString(data.get(i), inputType)).append("; actual: ").append(DataFormatTestUtil.rowDataToString(result.get(i), inputType));
    }
    builder.append("\n").append("types: ").append(Arrays.asList(inputType.getChildren()));
    builder.append("\n").append("keys: ").append(Arrays.toString(keys));
    String msg = builder.toString();
    for (int i = 0; i < data.size(); i++) {
        for (int j = 0; j < keys.length; j++) {
            boolean isNull1 = data.get(i).isNullAt(keys[j]);
            boolean isNull2 = result.get(i).isNullAt(keys[j]);
            Assert.assertEquals(msg, isNull1, isNull2);
            if (!isNull1 || !isNull2) {
                RowData.FieldGetter fieldGetter = RowData.createFieldGetter(keyTypes[j], keys[j]);
                Object o1 = fieldGetter.getFieldOrNull(data.get(i));
                Object o2 = fieldGetter.getFieldOrNull(result.get(i));
                if (keyTypes[j] instanceof VarBinaryType) {
                    Assert.assertArrayEquals(msg, (byte[]) o1, (byte[]) o2);
                } else if (keyTypes[j] instanceof TypeInformationRawType) {
                    assertThat(msg, (RawValueData) o1, equivalent((RawValueData) o2, new RawValueDataSerializer<>(IntSerializer.INSTANCE)));
                } else {
                    Assert.assertEquals(msg, o1, o2);
                }
            }
        }
    }
}
Also used : VarBinaryType(org.apache.flink.table.types.logical.VarBinaryType) ListMemorySegmentPool(org.apache.flink.table.runtime.operators.sort.ListMemorySegmentPool) ArrayList(java.util.ArrayList) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) TypeInformationRawType(org.apache.flink.table.types.logical.TypeInformationRawType) NormalizedKeyComputer(org.apache.flink.table.runtime.generated.NormalizedKeyComputer) GeneratedNormalizedKeyComputer(org.apache.flink.table.runtime.generated.GeneratedNormalizedKeyComputer) RawValueData(org.apache.flink.table.data.RawValueData) BinaryArrayData(org.apache.flink.table.data.binary.BinaryArrayData) MemorySegment(org.apache.flink.core.memory.MemorySegment) RecordComparator(org.apache.flink.table.runtime.generated.RecordComparator) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator) QuickSort(org.apache.flink.runtime.operators.sort.QuickSort) BinaryInMemorySortBuffer(org.apache.flink.table.runtime.operators.sort.BinaryInMemorySortBuffer) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Aggregations

GeneratedNormalizedKeyComputer (org.apache.flink.table.runtime.generated.GeneratedNormalizedKeyComputer)2 GeneratedRecordComparator (org.apache.flink.table.runtime.generated.GeneratedRecordComparator)2 NormalizedKeyComputer (org.apache.flink.table.runtime.generated.NormalizedKeyComputer)2 RecordComparator (org.apache.flink.table.runtime.generated.RecordComparator)2 BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)2 ArrayList (java.util.ArrayList)1 MemorySegment (org.apache.flink.core.memory.MemorySegment)1 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)1 QuickSort (org.apache.flink.runtime.operators.sort.QuickSort)1 GenericRowData (org.apache.flink.table.data.GenericRowData)1 RawValueData (org.apache.flink.table.data.RawValueData)1 RowData (org.apache.flink.table.data.RowData)1 BinaryArrayData (org.apache.flink.table.data.binary.BinaryArrayData)1 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)1 BinaryInMemorySortBuffer (org.apache.flink.table.runtime.operators.sort.BinaryInMemorySortBuffer)1 ListMemorySegmentPool (org.apache.flink.table.runtime.operators.sort.ListMemorySegmentPool)1 AbstractRowDataSerializer (org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer)1 BigIntType (org.apache.flink.table.types.logical.BigIntType)1 IntType (org.apache.flink.table.types.logical.IntType)1 LogicalType (org.apache.flink.table.types.logical.LogicalType)1