Search in sources :

Example 1 with RecordComparator

use of org.apache.flink.table.runtime.generated.RecordComparator in project flink by apache.

the class SortOperator method open.

@Override
public void open() throws Exception {
    super.open();
    LOG.info("Opening SortOperator");
    ClassLoader cl = getContainingTask().getUserCodeClassLoader();
    AbstractRowDataSerializer inputSerializer = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(getUserCodeClassloader());
    this.binarySerializer = new BinaryRowDataSerializer(inputSerializer.getArity());
    NormalizedKeyComputer computer = gComputer.newInstance(cl);
    RecordComparator comparator = gComparator.newInstance(cl);
    gComputer = null;
    gComparator = null;
    MemoryManager memManager = getContainingTask().getEnvironment().getMemoryManager();
    this.sorter = new BinaryExternalSorter(this.getContainingTask(), memManager, computeMemorySize(), this.getContainingTask().getEnvironment().getIOManager(), inputSerializer, binarySerializer, computer, comparator, getContainingTask().getJobConfiguration());
    this.sorter.startThreads();
    collector = new StreamRecordCollector<>(output);
    // register the metrics.
    getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) sorter::getUsedMemoryInBytes);
    getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) sorter::getNumSpillFiles);
    getMetricGroup().gauge("spillInBytes", (Gauge<Long>) sorter::getSpillInBytes);
}
Also used : AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer) NormalizedKeyComputer(org.apache.flink.table.runtime.generated.NormalizedKeyComputer) GeneratedNormalizedKeyComputer(org.apache.flink.table.runtime.generated.GeneratedNormalizedKeyComputer) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer) RecordComparator(org.apache.flink.table.runtime.generated.RecordComparator) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator)

Example 2 with RecordComparator

use of org.apache.flink.table.runtime.generated.RecordComparator in project flink by apache.

the class SortCodeGeneratorTest method testInner.

private void testInner() throws Exception {
    List<MemorySegment> segments = new ArrayList<>();
    for (int i = 0; i < 100; i++) {
        segments.add(MemorySegmentFactory.wrap(new byte[32768]));
    }
    Tuple2<NormalizedKeyComputer, RecordComparator> tuple2 = getSortBaseWithNulls(this.getClass().getSimpleName(), inputType, sortSpec);
    BinaryRowDataSerializer serializer = new BinaryRowDataSerializer(inputType.getFieldCount());
    BinaryInMemorySortBuffer sortBuffer = BinaryInMemorySortBuffer.createBuffer(tuple2.f0, (AbstractRowDataSerializer) serializer, serializer, tuple2.f1, new ListMemorySegmentPool(segments));
    BinaryRowData[] dataArray = getTestData();
    List<BinaryRowData> data = Arrays.asList(dataArray.clone());
    List<BinaryRowData> binaryRows = Arrays.asList(dataArray.clone());
    Collections.shuffle(binaryRows);
    for (BinaryRowData row : binaryRows) {
        if (!sortBuffer.write(row)) {
            throw new RuntimeException();
        }
    }
    new QuickSort().sort(sortBuffer);
    MutableObjectIterator<BinaryRowData> iter = sortBuffer.getIterator();
    List<BinaryRowData> result = new ArrayList<>();
    BinaryRowData row = serializer.createInstance();
    while ((row = iter.next(row)) != null) {
        result.add(row.copy());
    }
    int[] keys = sortSpec.getFieldIndices();
    LogicalType[] keyTypes = sortSpec.getFieldTypes(inputType);
    boolean[] orders = sortSpec.getAscendingOrders();
    data.sort((o1, o2) -> {
        for (int i = 0; i < keys.length; i++) {
            LogicalType t = inputType.getTypeAt(keys[i]);
            boolean order = orders[i];
            Object first = null;
            Object second = null;
            RowData.FieldGetter fieldGetter = RowData.createFieldGetter(keyTypes[i], keys[i]);
            if (!o1.isNullAt(keys[i])) {
                first = fieldGetter.getFieldOrNull(o1);
            }
            if (!o2.isNullAt(keys[i])) {
                second = fieldGetter.getFieldOrNull(o2);
            }
            if (first != null || second != null) {
                if (first == null) {
                    return order ? -1 : 1;
                }
                if (second == null) {
                    return order ? 1 : -1;
                }
                if (first instanceof Comparable) {
                    int ret = ((Comparable) first).compareTo(second);
                    if (ret != 0) {
                        return order ? ret : -ret;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.ARRAY) {
                    BinaryArrayData leftArray = (BinaryArrayData) first;
                    BinaryArrayData rightArray = (BinaryArrayData) second;
                    int minLength = Math.min(leftArray.size(), rightArray.size());
                    for (int j = 0; j < minLength; j++) {
                        boolean isNullLeft = leftArray.isNullAt(j);
                        boolean isNullRight = rightArray.isNullAt(j);
                        if (isNullLeft && isNullRight) {
                        // Do nothing.
                        } else if (isNullLeft) {
                            return order ? -1 : 1;
                        } else if (isNullRight) {
                            return order ? 1 : -1;
                        } else {
                            int comp = Byte.compare(leftArray.getByte(j), rightArray.getByte(j));
                            if (comp != 0) {
                                return order ? comp : -comp;
                            }
                        }
                    }
                    if (leftArray.size() < rightArray.size()) {
                        return order ? -1 : 1;
                    } else if (leftArray.size() > rightArray.size()) {
                        return order ? 1 : -1;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.VARBINARY) {
                    int comp = org.apache.flink.table.runtime.operators.sort.SortUtil.compareBinary((byte[]) first, (byte[]) second);
                    if (comp != 0) {
                        return order ? comp : -comp;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.ROW) {
                    RowType rowType = (RowType) t;
                    int comp;
                    if (rowType.getFields().get(0).getType() instanceof IntType) {
                        comp = INT_ROW_COMP.compare(INT_ROW_CONV.toExternal(first), INT_ROW_CONV.toExternal(second));
                    } else {
                        comp = NEST_ROW_COMP.compare(NEST_ROW_CONV.toExternal(first), NEST_ROW_CONV.toExternal(second));
                    }
                    if (comp != 0) {
                        return order ? comp : -comp;
                    }
                } else if (t.getTypeRoot() == LogicalTypeRoot.RAW) {
                    Integer i1 = ((RawValueData<Integer>) first).toObject(IntSerializer.INSTANCE);
                    Integer i2 = ((RawValueData<Integer>) second).toObject(IntSerializer.INSTANCE);
                    int comp = Integer.compare(i1, i2);
                    if (comp != 0) {
                        return order ? comp : -comp;
                    }
                } else {
                    throw new RuntimeException();
                }
            }
        }
        return 0;
    });
    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < data.size(); i++) {
        builder.append("\n").append("expect: ").append(DataFormatTestUtil.rowDataToString(data.get(i), inputType)).append("; actual: ").append(DataFormatTestUtil.rowDataToString(result.get(i), inputType));
    }
    builder.append("\n").append("types: ").append(Arrays.asList(inputType.getChildren()));
    builder.append("\n").append("keys: ").append(Arrays.toString(keys));
    String msg = builder.toString();
    for (int i = 0; i < data.size(); i++) {
        for (int j = 0; j < keys.length; j++) {
            boolean isNull1 = data.get(i).isNullAt(keys[j]);
            boolean isNull2 = result.get(i).isNullAt(keys[j]);
            Assert.assertEquals(msg, isNull1, isNull2);
            if (!isNull1 || !isNull2) {
                RowData.FieldGetter fieldGetter = RowData.createFieldGetter(keyTypes[j], keys[j]);
                Object o1 = fieldGetter.getFieldOrNull(data.get(i));
                Object o2 = fieldGetter.getFieldOrNull(result.get(i));
                if (keyTypes[j] instanceof VarBinaryType) {
                    Assert.assertArrayEquals(msg, (byte[]) o1, (byte[]) o2);
                } else if (keyTypes[j] instanceof TypeInformationRawType) {
                    assertThat(msg, (RawValueData) o1, equivalent((RawValueData) o2, new RawValueDataSerializer<>(IntSerializer.INSTANCE)));
                } else {
                    Assert.assertEquals(msg, o1, o2);
                }
            }
        }
    }
}
Also used : VarBinaryType(org.apache.flink.table.types.logical.VarBinaryType) ListMemorySegmentPool(org.apache.flink.table.runtime.operators.sort.ListMemorySegmentPool) ArrayList(java.util.ArrayList) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) TypeInformationRawType(org.apache.flink.table.types.logical.TypeInformationRawType) NormalizedKeyComputer(org.apache.flink.table.runtime.generated.NormalizedKeyComputer) GeneratedNormalizedKeyComputer(org.apache.flink.table.runtime.generated.GeneratedNormalizedKeyComputer) RawValueData(org.apache.flink.table.data.RawValueData) BinaryArrayData(org.apache.flink.table.data.binary.BinaryArrayData) MemorySegment(org.apache.flink.core.memory.MemorySegment) RecordComparator(org.apache.flink.table.runtime.generated.RecordComparator) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator) QuickSort(org.apache.flink.runtime.operators.sort.QuickSort) BinaryInMemorySortBuffer(org.apache.flink.table.runtime.operators.sort.BinaryInMemorySortBuffer) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Example 3 with RecordComparator

use of org.apache.flink.table.runtime.generated.RecordComparator in project flink by apache.

the class CodeSplitTest method testRecordComparator.

@Test
public void testRecordComparator() {
    int numFields = 600;
    RowType rowType = getIntRowType(numFields);
    SortSpec.SortSpecBuilder builder = SortSpec.builder();
    for (int i = 0; i < numFields; i++) {
        builder.addField(i, true, true);
    }
    SortSpec sortSpec = builder.build();
    GenericRowData rowData1 = new GenericRowData(numFields);
    GenericRowData rowData2 = new GenericRowData(numFields);
    Random random = new Random();
    for (int i = 0; i < numFields; i++) {
        int x = random.nextInt(100);
        rowData1.setField(i, x);
        rowData2.setField(i, x);
    }
    int result = random.nextInt(3) - 1;
    if (result == -1) {
        rowData1.setField(random.nextInt(numFields), -1);
    } else if (result == 1) {
        rowData1.setField(random.nextInt(numFields), 100);
    }
    Consumer<TableConfig> consumer = tableConfig -> {
        RecordComparator instance = ComparatorCodeGenerator.gen(tableConfig, "", rowType, sortSpec).newInstance(classLoader);
        for (int i = 0; i < 100; i++) {
            Assert.assertEquals(result, instance.compare(rowData1, rowData2));
        }
    };
    runTest(consumer);
}
Also used : Arrays(java.util.Arrays) FlinkMatchers(org.apache.flink.core.testutils.FlinkMatchers) IntType(org.apache.flink.table.types.logical.IntType) Random(java.util.Random) FlinkTypeFactory(org.apache.flink.table.planner.calcite.FlinkTypeFactory) RowType(org.apache.flink.table.types.logical.RowType) ArrayList(java.util.ArrayList) HashFunction(org.apache.flink.table.runtime.generated.HashFunction) TableConfigOptions(org.apache.flink.table.api.config.TableConfigOptions) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) GenericRowData(org.apache.flink.table.data.GenericRowData) RexNode(org.apache.calcite.rex.RexNode) OutputStream(java.io.OutputStream) PrintStream(java.io.PrintStream) RelDataType(org.apache.calcite.rel.type.RelDataType) TableConfig(org.apache.flink.table.api.TableConfig) RecordComparator(org.apache.flink.table.runtime.generated.RecordComparator) RexBuilder(org.apache.calcite.rex.RexBuilder) Test(org.junit.Test) IOException(java.io.IOException) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) ComparatorCodeGenerator(org.apache.flink.table.planner.codegen.sort.ComparatorCodeGenerator) RexInputRef(org.apache.calcite.rex.RexInputRef) Consumer(java.util.function.Consumer) JoinUtil(org.apache.flink.table.planner.plan.utils.JoinUtil) JoinCondition(org.apache.flink.table.runtime.generated.JoinCondition) List(java.util.List) MatcherAssert(org.hamcrest.MatcherAssert) LogicalType(org.apache.flink.table.types.logical.LogicalType) SqlStdOperatorTable(org.apache.calcite.sql.fun.SqlStdOperatorTable) Assert(org.junit.Assert) Collections(java.util.Collections) SortSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec) Projection(org.apache.flink.table.runtime.generated.Projection) Random(java.util.Random) RowType(org.apache.flink.table.types.logical.RowType) GenericRowData(org.apache.flink.table.data.GenericRowData) TableConfig(org.apache.flink.table.api.TableConfig) SortSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec) RecordComparator(org.apache.flink.table.runtime.generated.RecordComparator) Test(org.junit.Test)

Aggregations

RecordComparator (org.apache.flink.table.runtime.generated.RecordComparator)3 ArrayList (java.util.ArrayList)2 GenericRowData (org.apache.flink.table.data.GenericRowData)2 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)2 GeneratedNormalizedKeyComputer (org.apache.flink.table.runtime.generated.GeneratedNormalizedKeyComputer)2 GeneratedRecordComparator (org.apache.flink.table.runtime.generated.GeneratedRecordComparator)2 NormalizedKeyComputer (org.apache.flink.table.runtime.generated.NormalizedKeyComputer)2 IntType (org.apache.flink.table.types.logical.IntType)2 LogicalType (org.apache.flink.table.types.logical.LogicalType)2 RowType (org.apache.flink.table.types.logical.RowType)2 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 PrintStream (java.io.PrintStream)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Random (java.util.Random)1 Consumer (java.util.function.Consumer)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 RexBuilder (org.apache.calcite.rex.RexBuilder)1