Search in sources :

Example 1 with AbstractRowDataSerializer

use of org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer in project flink by apache.

the class SortOperator method open.

@Override
public void open() throws Exception {
    super.open();
    LOG.info("Opening SortOperator");
    ClassLoader cl = getContainingTask().getUserCodeClassLoader();
    AbstractRowDataSerializer inputSerializer = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(getUserCodeClassloader());
    this.binarySerializer = new BinaryRowDataSerializer(inputSerializer.getArity());
    NormalizedKeyComputer computer = gComputer.newInstance(cl);
    RecordComparator comparator = gComparator.newInstance(cl);
    gComputer = null;
    gComparator = null;
    MemoryManager memManager = getContainingTask().getEnvironment().getMemoryManager();
    this.sorter = new BinaryExternalSorter(this.getContainingTask(), memManager, computeMemorySize(), this.getContainingTask().getEnvironment().getIOManager(), inputSerializer, binarySerializer, computer, comparator, getContainingTask().getJobConfiguration());
    this.sorter.startThreads();
    collector = new StreamRecordCollector<>(output);
    // register the metrics.
    getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) sorter::getUsedMemoryInBytes);
    getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) sorter::getNumSpillFiles);
    getMetricGroup().gauge("spillInBytes", (Gauge<Long>) sorter::getSpillInBytes);
}
Also used : AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer) NormalizedKeyComputer(org.apache.flink.table.runtime.generated.NormalizedKeyComputer) GeneratedNormalizedKeyComputer(org.apache.flink.table.runtime.generated.GeneratedNormalizedKeyComputer) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer) RecordComparator(org.apache.flink.table.runtime.generated.RecordComparator) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator)

Example 2 with AbstractRowDataSerializer

use of org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer in project flink by apache.

the class SortMergeJoinOperator method open.

@Override
public void open() throws Exception {
    super.open();
    Configuration conf = getContainingTask().getJobConfiguration();
    isFinished = new boolean[] { false, false };
    collector = new StreamRecordCollector<>(output);
    ClassLoader cl = getUserCodeClassloader();
    AbstractRowDataSerializer inputSerializer1 = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(cl);
    this.serializer1 = new BinaryRowDataSerializer(inputSerializer1.getArity());
    AbstractRowDataSerializer inputSerializer2 = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn2(cl);
    this.serializer2 = new BinaryRowDataSerializer(inputSerializer2.getArity());
    this.memManager = this.getContainingTask().getEnvironment().getMemoryManager();
    this.ioManager = this.getContainingTask().getEnvironment().getIOManager();
    long totalMemory = computeMemorySize();
    externalBufferMemory = (long) (totalMemory * externalBufferMemRatio);
    externalBufferMemory = Math.max(externalBufferMemory, ResettableExternalBuffer.MIN_NUM_MEMORY);
    long totalSortMem = totalMemory - (type.equals(FlinkJoinType.FULL) ? externalBufferMemory * 2 : externalBufferMemory);
    if (totalSortMem < 0) {
        throw new TableException("Memory size is too small: " + totalMemory + ", please increase manage memory of task manager.");
    }
    // sorter1
    this.sorter1 = new BinaryExternalSorter(this.getContainingTask(), memManager, totalSortMem / 2, ioManager, inputSerializer1, serializer1, computer1.newInstance(cl), comparator1.newInstance(cl), conf);
    this.sorter1.startThreads();
    // sorter2
    this.sorter2 = new BinaryExternalSorter(this.getContainingTask(), memManager, totalSortMem / 2, ioManager, inputSerializer2, serializer2, computer2.newInstance(cl), comparator2.newInstance(cl), conf);
    this.sorter2.startThreads();
    keyComparator = genKeyComparator.newInstance(cl);
    this.condFunc = condFuncCode.newInstance(cl);
    condFunc.setRuntimeContext(getRuntimeContext());
    condFunc.open(new Configuration());
    projection1 = projectionCode1.newInstance(cl);
    projection2 = projectionCode2.newInstance(cl);
    this.leftNullRow = new GenericRowData(serializer1.getArity());
    this.rightNullRow = new GenericRowData(serializer2.getArity());
    this.joinedRow = new JoinedRowData();
    condFuncCode = null;
    computer1 = null;
    comparator1 = null;
    computer2 = null;
    comparator2 = null;
    projectionCode1 = null;
    projectionCode2 = null;
    genKeyComparator = null;
    getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) () -> sorter1.getUsedMemoryInBytes() + sorter2.getUsedMemoryInBytes());
    getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) () -> sorter1.getNumSpillFiles() + sorter2.getNumSpillFiles());
    getMetricGroup().gauge("spillInBytes", (Gauge<Long>) () -> sorter1.getSpillInBytes() + sorter2.getSpillInBytes());
}
Also used : AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer) TableException(org.apache.flink.table.api.TableException) Configuration(org.apache.flink.configuration.Configuration) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) BinaryExternalSorter(org.apache.flink.table.runtime.operators.sort.BinaryExternalSorter) GenericRowData(org.apache.flink.table.data.GenericRowData) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Example 3 with AbstractRowDataSerializer

use of org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer in project flink by apache.

the class StreamExecLocalWindowAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
    final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
    final AggregateInfoList aggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
    false);
    final GeneratedNamespaceAggsHandleFunction<Long> generatedAggsHandler = createAggsHandler(sliceAssigner, aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    PagedTypeSerializer<RowData> keySer = (PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer();
    AbstractRowDataSerializer<RowData> valueSer = new RowDataSerializer(inputRowType);
    WindowBuffer.LocalFactory bufferFactory = new RecordsWindowBuffer.LocalFactory(keySer, valueSer, new LocalAggCombiner.Factory(generatedAggsHandler));
    final OneInputStreamOperator<RowData, RowData> localAggOperator = new LocalSlicingWindowAggOperator(selector, sliceAssigner, bufferFactory, shiftTimeZone);
    return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(LOCAL_WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(localAggOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), // use less memory here to let the chained head operator can have more memory
    WINDOW_AGG_MEMORY_RATIO / 2);
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) LocalAggCombiner(org.apache.flink.table.runtime.operators.aggregate.window.combines.LocalAggCombiner) RowType(org.apache.flink.table.types.logical.RowType) WindowBuffer(org.apache.flink.table.runtime.operators.aggregate.window.buffers.WindowBuffer) RecordsWindowBuffer(org.apache.flink.table.runtime.operators.aggregate.window.buffers.RecordsWindowBuffer) RowData(org.apache.flink.table.data.RowData) PagedTypeSerializer(org.apache.flink.table.runtime.typeutils.PagedTypeSerializer) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) LocalSlicingWindowAggOperator(org.apache.flink.table.runtime.operators.aggregate.window.LocalSlicingWindowAggOperator) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer) AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer)

Example 4 with AbstractRowDataSerializer

use of org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer in project flink by apache.

the class BinaryExternalSorterTest method testSpillingRandom.

@Test
public void testSpillingRandom() throws Exception {
    int size = 1000_000;
    MockBinaryRowReader reader = new MockBinaryRowReader(size);
    LOG.debug("initializing sortmerger");
    long minMemorySize = memoryManager.computeNumberOfPages(0.1) * MemoryManager.DEFAULT_PAGE_SIZE;
    BinaryExternalSorter sorter = new BinaryExternalSorter(new Object(), this.memoryManager, minMemorySize, this.ioManager, (AbstractRowDataSerializer) serializer, serializer, IntNormalizedKeyComputer.INSTANCE, IntRecordComparator.INSTANCE, conf, 0.7f);
    sorter.startThreads();
    List<BinaryRowData> data = new ArrayList<>();
    BinaryRowData row = serializer.createInstance();
    for (int i = 0; i < size; i++) {
        row = reader.next(row);
        data.add(row.copy());
    }
    Collections.shuffle(data);
    for (int i = 0; i < size; i++) {
        sorter.write(data.get(i));
    }
    MutableObjectIterator<BinaryRowData> iterator = sorter.getIterator();
    data.sort(Comparator.comparingInt(o -> o.getInt(0)));
    BinaryRowData next = serializer.createInstance();
    for (int i = 0; i < size; i++) {
        next = iterator.next(next);
        Assert.assertEquals(data.get(i).getInt(0), next.getInt(0));
        Assert.assertEquals(data.get(i).getString(1), next.getString(1));
    }
    sorter.close();
}
Also used : Arrays(java.util.Arrays) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RunWith(org.junit.runner.RunWith) LoggerFactory(org.slf4j.LoggerFactory) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer) ArrayList(java.util.ArrayList) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) After(org.junit.After) MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) Parameterized(org.junit.runners.Parameterized) MemoryManagerBuilder(org.apache.flink.runtime.memory.MemoryManagerBuilder) Before(org.junit.Before) RowData(org.apache.flink.table.data.RowData) Logger(org.slf4j.Logger) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer) StringData(org.apache.flink.table.data.StringData) List(java.util.List) Assert(org.junit.Assert) Comparator(java.util.Comparator) ExecutionConfigOptions(org.apache.flink.table.api.config.ExecutionConfigOptions) Collections(java.util.Collections) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 5 with AbstractRowDataSerializer

use of org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer in project flink by apache.

the class HashJoinOperator method open.

@Override
public void open() throws Exception {
    super.open();
    ClassLoader cl = getContainingTask().getUserCodeClassLoader();
    final AbstractRowDataSerializer buildSerializer = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(getUserCodeClassloader());
    final AbstractRowDataSerializer probeSerializer = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn2(getUserCodeClassloader());
    boolean hashJoinUseBitMaps = getContainingTask().getEnvironment().getTaskConfiguration().getBoolean(AlgorithmOptions.HASH_JOIN_BLOOM_FILTERS);
    int parallel = getRuntimeContext().getNumberOfParallelSubtasks();
    this.condition = parameter.condFuncCode.newInstance(cl);
    condition.setRuntimeContext(getRuntimeContext());
    condition.open(new Configuration());
    this.table = new BinaryHashTable(getContainingTask().getJobConfiguration(), getContainingTask(), buildSerializer, probeSerializer, parameter.buildProjectionCode.newInstance(cl), parameter.probeProjectionCode.newInstance(cl), getContainingTask().getEnvironment().getMemoryManager(), computeMemorySize(), getContainingTask().getEnvironment().getIOManager(), parameter.buildRowSize, parameter.buildRowCount / parallel, hashJoinUseBitMaps, type, condition, reverseJoinFunction, parameter.filterNullKeys, parameter.tryDistinctBuildRow);
    this.collector = new StreamRecordCollector<>(output);
    this.buildSideNullRow = new GenericRowData(buildSerializer.getArity());
    this.probeSideNullRow = new GenericRowData(probeSerializer.getArity());
    this.joinedRow = new JoinedRowData();
    this.buildEnd = false;
    getMetricGroup().gauge("memoryUsedSizeInBytes", table::getUsedMemoryInBytes);
    getMetricGroup().gauge("numSpillFiles", table::getNumSpillFiles);
    getMetricGroup().gauge("spillInBytes", table::getSpillInBytes);
    parameter.condFuncCode = null;
    parameter.buildProjectionCode = null;
    parameter.probeProjectionCode = null;
}
Also used : AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer) Configuration(org.apache.flink.configuration.Configuration) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) BinaryHashTable(org.apache.flink.table.runtime.hashtable.BinaryHashTable) GenericRowData(org.apache.flink.table.data.GenericRowData) InputSelectable(org.apache.flink.streaming.api.operators.InputSelectable)

Aggregations

AbstractRowDataSerializer (org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer)5 Configuration (org.apache.flink.configuration.Configuration)3 BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)3 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)2 GenericRowData (org.apache.flink.table.data.GenericRowData)2 RowData (org.apache.flink.table.data.RowData)2 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)2 ZoneId (java.time.ZoneId)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 List (java.util.List)1 Transformation (org.apache.flink.api.dag.Transformation)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)1 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)1 MemoryManagerBuilder (org.apache.flink.runtime.memory.MemoryManagerBuilder)1 InputSelectable (org.apache.flink.streaming.api.operators.InputSelectable)1