Search in sources :

Example 1 with BinaryExternalSorter

use of org.apache.flink.table.runtime.operators.sort.BinaryExternalSorter in project flink by apache.

the class SortMergeJoinOperator method open.

@Override
public void open() throws Exception {
    super.open();
    Configuration conf = getContainingTask().getJobConfiguration();
    isFinished = new boolean[] { false, false };
    collector = new StreamRecordCollector<>(output);
    ClassLoader cl = getUserCodeClassloader();
    AbstractRowDataSerializer inputSerializer1 = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(cl);
    this.serializer1 = new BinaryRowDataSerializer(inputSerializer1.getArity());
    AbstractRowDataSerializer inputSerializer2 = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn2(cl);
    this.serializer2 = new BinaryRowDataSerializer(inputSerializer2.getArity());
    this.memManager = this.getContainingTask().getEnvironment().getMemoryManager();
    this.ioManager = this.getContainingTask().getEnvironment().getIOManager();
    long totalMemory = computeMemorySize();
    externalBufferMemory = (long) (totalMemory * externalBufferMemRatio);
    externalBufferMemory = Math.max(externalBufferMemory, ResettableExternalBuffer.MIN_NUM_MEMORY);
    long totalSortMem = totalMemory - (type.equals(FlinkJoinType.FULL) ? externalBufferMemory * 2 : externalBufferMemory);
    if (totalSortMem < 0) {
        throw new TableException("Memory size is too small: " + totalMemory + ", please increase manage memory of task manager.");
    }
    // sorter1
    this.sorter1 = new BinaryExternalSorter(this.getContainingTask(), memManager, totalSortMem / 2, ioManager, inputSerializer1, serializer1, computer1.newInstance(cl), comparator1.newInstance(cl), conf);
    this.sorter1.startThreads();
    // sorter2
    this.sorter2 = new BinaryExternalSorter(this.getContainingTask(), memManager, totalSortMem / 2, ioManager, inputSerializer2, serializer2, computer2.newInstance(cl), comparator2.newInstance(cl), conf);
    this.sorter2.startThreads();
    keyComparator = genKeyComparator.newInstance(cl);
    this.condFunc = condFuncCode.newInstance(cl);
    condFunc.setRuntimeContext(getRuntimeContext());
    condFunc.open(new Configuration());
    projection1 = projectionCode1.newInstance(cl);
    projection2 = projectionCode2.newInstance(cl);
    this.leftNullRow = new GenericRowData(serializer1.getArity());
    this.rightNullRow = new GenericRowData(serializer2.getArity());
    this.joinedRow = new JoinedRowData();
    condFuncCode = null;
    computer1 = null;
    comparator1 = null;
    computer2 = null;
    comparator2 = null;
    projectionCode1 = null;
    projectionCode2 = null;
    genKeyComparator = null;
    getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) () -> sorter1.getUsedMemoryInBytes() + sorter2.getUsedMemoryInBytes());
    getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) () -> sorter1.getNumSpillFiles() + sorter2.getNumSpillFiles());
    getMetricGroup().gauge("spillInBytes", (Gauge<Long>) () -> sorter1.getSpillInBytes() + sorter2.getSpillInBytes());
}
Also used : AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer) TableException(org.apache.flink.table.api.TableException) Configuration(org.apache.flink.configuration.Configuration) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) BinaryExternalSorter(org.apache.flink.table.runtime.operators.sort.BinaryExternalSorter) GenericRowData(org.apache.flink.table.data.GenericRowData) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Aggregations

Configuration (org.apache.flink.configuration.Configuration)1 TableException (org.apache.flink.table.api.TableException)1 GenericRowData (org.apache.flink.table.data.GenericRowData)1 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)1 BinaryExternalSorter (org.apache.flink.table.runtime.operators.sort.BinaryExternalSorter)1 AbstractRowDataSerializer (org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer)1 BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)1