use of org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer in project flink by apache.
the class SortOperator method open.
@Override
public void open() throws Exception {
super.open();
LOG.info("Opening SortOperator");
ClassLoader cl = getContainingTask().getUserCodeClassLoader();
AbstractRowDataSerializer inputSerializer = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(getUserCodeClassloader());
this.binarySerializer = new BinaryRowDataSerializer(inputSerializer.getArity());
NormalizedKeyComputer computer = gComputer.newInstance(cl);
RecordComparator comparator = gComparator.newInstance(cl);
gComputer = null;
gComparator = null;
MemoryManager memManager = getContainingTask().getEnvironment().getMemoryManager();
this.sorter = new BinaryExternalSorter(this.getContainingTask(), memManager, computeMemorySize(), this.getContainingTask().getEnvironment().getIOManager(), inputSerializer, binarySerializer, computer, comparator, getContainingTask().getJobConfiguration());
this.sorter.startThreads();
collector = new StreamRecordCollector<>(output);
// register the metrics.
getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) sorter::getUsedMemoryInBytes);
getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) sorter::getNumSpillFiles);
getMetricGroup().gauge("spillInBytes", (Gauge<Long>) sorter::getSpillInBytes);
}
use of org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer in project flink by apache.
the class SortMergeJoinOperator method open.
@Override
public void open() throws Exception {
super.open();
Configuration conf = getContainingTask().getJobConfiguration();
isFinished = new boolean[] { false, false };
collector = new StreamRecordCollector<>(output);
ClassLoader cl = getUserCodeClassloader();
AbstractRowDataSerializer inputSerializer1 = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(cl);
this.serializer1 = new BinaryRowDataSerializer(inputSerializer1.getArity());
AbstractRowDataSerializer inputSerializer2 = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn2(cl);
this.serializer2 = new BinaryRowDataSerializer(inputSerializer2.getArity());
this.memManager = this.getContainingTask().getEnvironment().getMemoryManager();
this.ioManager = this.getContainingTask().getEnvironment().getIOManager();
long totalMemory = computeMemorySize();
externalBufferMemory = (long) (totalMemory * externalBufferMemRatio);
externalBufferMemory = Math.max(externalBufferMemory, ResettableExternalBuffer.MIN_NUM_MEMORY);
long totalSortMem = totalMemory - (type.equals(FlinkJoinType.FULL) ? externalBufferMemory * 2 : externalBufferMemory);
if (totalSortMem < 0) {
throw new TableException("Memory size is too small: " + totalMemory + ", please increase manage memory of task manager.");
}
// sorter1
this.sorter1 = new BinaryExternalSorter(this.getContainingTask(), memManager, totalSortMem / 2, ioManager, inputSerializer1, serializer1, computer1.newInstance(cl), comparator1.newInstance(cl), conf);
this.sorter1.startThreads();
// sorter2
this.sorter2 = new BinaryExternalSorter(this.getContainingTask(), memManager, totalSortMem / 2, ioManager, inputSerializer2, serializer2, computer2.newInstance(cl), comparator2.newInstance(cl), conf);
this.sorter2.startThreads();
keyComparator = genKeyComparator.newInstance(cl);
this.condFunc = condFuncCode.newInstance(cl);
condFunc.setRuntimeContext(getRuntimeContext());
condFunc.open(new Configuration());
projection1 = projectionCode1.newInstance(cl);
projection2 = projectionCode2.newInstance(cl);
this.leftNullRow = new GenericRowData(serializer1.getArity());
this.rightNullRow = new GenericRowData(serializer2.getArity());
this.joinedRow = new JoinedRowData();
condFuncCode = null;
computer1 = null;
comparator1 = null;
computer2 = null;
comparator2 = null;
projectionCode1 = null;
projectionCode2 = null;
genKeyComparator = null;
getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) () -> sorter1.getUsedMemoryInBytes() + sorter2.getUsedMemoryInBytes());
getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) () -> sorter1.getNumSpillFiles() + sorter2.getNumSpillFiles());
getMetricGroup().gauge("spillInBytes", (Gauge<Long>) () -> sorter1.getSpillInBytes() + sorter2.getSpillInBytes());
}
use of org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer in project flink by apache.
the class StreamExecLocalWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
final AggregateInfoList aggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
false);
final GeneratedNamespaceAggsHandleFunction<Long> generatedAggsHandler = createAggsHandler(sliceAssigner, aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
PagedTypeSerializer<RowData> keySer = (PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer();
AbstractRowDataSerializer<RowData> valueSer = new RowDataSerializer(inputRowType);
WindowBuffer.LocalFactory bufferFactory = new RecordsWindowBuffer.LocalFactory(keySer, valueSer, new LocalAggCombiner.Factory(generatedAggsHandler));
final OneInputStreamOperator<RowData, RowData> localAggOperator = new LocalSlicingWindowAggOperator(selector, sliceAssigner, bufferFactory, shiftTimeZone);
return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(LOCAL_WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(localAggOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), // use less memory here to let the chained head operator can have more memory
WINDOW_AGG_MEMORY_RATIO / 2);
}
use of org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer in project flink by apache.
the class BinaryExternalSorterTest method testSpillingRandom.
@Test
public void testSpillingRandom() throws Exception {
int size = 1000_000;
MockBinaryRowReader reader = new MockBinaryRowReader(size);
LOG.debug("initializing sortmerger");
long minMemorySize = memoryManager.computeNumberOfPages(0.1) * MemoryManager.DEFAULT_PAGE_SIZE;
BinaryExternalSorter sorter = new BinaryExternalSorter(new Object(), this.memoryManager, minMemorySize, this.ioManager, (AbstractRowDataSerializer) serializer, serializer, IntNormalizedKeyComputer.INSTANCE, IntRecordComparator.INSTANCE, conf, 0.7f);
sorter.startThreads();
List<BinaryRowData> data = new ArrayList<>();
BinaryRowData row = serializer.createInstance();
for (int i = 0; i < size; i++) {
row = reader.next(row);
data.add(row.copy());
}
Collections.shuffle(data);
for (int i = 0; i < size; i++) {
sorter.write(data.get(i));
}
MutableObjectIterator<BinaryRowData> iterator = sorter.getIterator();
data.sort(Comparator.comparingInt(o -> o.getInt(0)));
BinaryRowData next = serializer.createInstance();
for (int i = 0; i < size; i++) {
next = iterator.next(next);
Assert.assertEquals(data.get(i).getInt(0), next.getInt(0));
Assert.assertEquals(data.get(i).getString(1), next.getString(1));
}
sorter.close();
}
use of org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer in project flink by apache.
the class HashJoinOperator method open.
@Override
public void open() throws Exception {
super.open();
ClassLoader cl = getContainingTask().getUserCodeClassLoader();
final AbstractRowDataSerializer buildSerializer = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(getUserCodeClassloader());
final AbstractRowDataSerializer probeSerializer = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn2(getUserCodeClassloader());
boolean hashJoinUseBitMaps = getContainingTask().getEnvironment().getTaskConfiguration().getBoolean(AlgorithmOptions.HASH_JOIN_BLOOM_FILTERS);
int parallel = getRuntimeContext().getNumberOfParallelSubtasks();
this.condition = parameter.condFuncCode.newInstance(cl);
condition.setRuntimeContext(getRuntimeContext());
condition.open(new Configuration());
this.table = new BinaryHashTable(getContainingTask().getJobConfiguration(), getContainingTask(), buildSerializer, probeSerializer, parameter.buildProjectionCode.newInstance(cl), parameter.probeProjectionCode.newInstance(cl), getContainingTask().getEnvironment().getMemoryManager(), computeMemorySize(), getContainingTask().getEnvironment().getIOManager(), parameter.buildRowSize, parameter.buildRowCount / parallel, hashJoinUseBitMaps, type, condition, reverseJoinFunction, parameter.filterNullKeys, parameter.tryDistinctBuildRow);
this.collector = new StreamRecordCollector<>(output);
this.buildSideNullRow = new GenericRowData(buildSerializer.getArity());
this.probeSideNullRow = new GenericRowData(probeSerializer.getArity());
this.joinedRow = new JoinedRowData();
this.buildEnd = false;
getMetricGroup().gauge("memoryUsedSizeInBytes", table::getUsedMemoryInBytes);
getMetricGroup().gauge("numSpillFiles", table::getNumSpillFiles);
getMetricGroup().gauge("spillInBytes", table::getSpillInBytes);
parameter.condFuncCode = null;
parameter.buildProjectionCode = null;
parameter.probeProjectionCode = null;
}
Aggregations