use of org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer in project flink by apache.
the class BinaryRowDataTest method testGenericObject.
@Test
public void testGenericObject() throws Exception {
GenericTypeInfo<MyObj> info = new GenericTypeInfo<>(MyObj.class);
TypeSerializer<MyObj> genericSerializer = info.createSerializer(new ExecutionConfig());
RawValueDataSerializer<MyObj> binarySerializer = new RawValueDataSerializer<>(genericSerializer);
BinaryRowData row = new BinaryRowData(4);
BinaryRowWriter writer = new BinaryRowWriter(row);
writer.writeInt(0, 0);
RawValueData<MyObj> myObj1 = RawValueData.fromObject(new MyObj(0, 1));
writer.writeRawValue(1, myObj1, binarySerializer);
RawValueData<MyObj> myObj2 = RawValueData.fromObject(new MyObj(123, 5.0));
writer.writeRawValue(2, myObj2, binarySerializer);
RawValueData<MyObj> myObj3 = RawValueData.fromObject(new MyObj(1, 1));
writer.writeRawValue(3, myObj3, binarySerializer);
writer.complete();
assertTestGenericObjectRow(row, genericSerializer);
// getBytes from var-length memorySegments.
BinaryRowDataSerializer serializer = new BinaryRowDataSerializer(4);
MemorySegment[] memorySegments = new MemorySegment[3];
ArrayList<MemorySegment> memorySegmentList = new ArrayList<>();
for (int i = 0; i < 3; i++) {
memorySegments[i] = MemorySegmentFactory.wrap(new byte[64]);
memorySegmentList.add(memorySegments[i]);
}
RandomAccessOutputView out = new RandomAccessOutputView(memorySegments, 64);
serializer.serializeToPages(row, out);
BinaryRowData mapRow = serializer.createInstance();
mapRow = serializer.mapFromPages(mapRow, new RandomAccessInputView(memorySegmentList, 64));
assertTestGenericObjectRow(mapRow, genericSerializer);
}
use of org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer in project flink by apache.
the class BinaryRowDataTest method testSerializeVariousSize.
@Test
public void testSerializeVariousSize() throws IOException {
// in this test, we are going to start serializing from the i-th byte (i in 0...`segSize`)
// and the size of the row we're going to serialize is j bytes
// (j in `rowFixLength` to the maximum length we can write)
int segSize = 64;
int segTotalNumber = 3;
BinaryRowData row = new BinaryRowData(1);
BinaryRowWriter writer = new BinaryRowWriter(row);
Random random = new Random();
byte[] bytes = new byte[1024];
random.nextBytes(bytes);
writer.writeBinary(0, bytes);
writer.complete();
MemorySegment[] memorySegments = new MemorySegment[segTotalNumber];
Map<MemorySegment, Integer> msIndex = new HashMap<>();
for (int i = 0; i < segTotalNumber; i++) {
memorySegments[i] = MemorySegmentFactory.wrap(new byte[segSize]);
msIndex.put(memorySegments[i], i);
}
BinaryRowDataSerializer serializer = new BinaryRowDataSerializer(1);
int rowSizeInt = 4;
// note that as there is only one field in the row, the fixed-length part is 16 bytes
// (header + 1 field)
int rowFixLength = 16;
for (int i = 0; i < segSize; i++) {
// this is the maximum row size we can serialize
// if we are going to serialize from the i-th byte of the input view
int maxRowSize = (segSize * segTotalNumber) - i - rowSizeInt;
if (segSize - i < rowFixLength + rowSizeInt) {
// oops, we can't write the whole fixed-length part in the first segment
// because the remaining space is too small, so we have to start serializing from
// the second segment.
// when serializing, we need to first write the length of the row,
// then write the fixed-length part of the row.
maxRowSize -= segSize - i;
}
for (int j = rowFixLength; j < maxRowSize; j++) {
// ok, now we're going to serialize a row of j bytes
testSerialize(row, memorySegments, msIndex, serializer, i, j);
}
}
}
use of org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer in project flink by apache.
the class SumHashAggTestOperator method processElement.
@Override
public void processElement(StreamRecord<RowData> element) throws Exception {
RowData in1 = element.getValue();
// project key from input
currentKeyWriter.reset();
if (in1.isNullAt(0)) {
currentKeyWriter.setNullAt(0);
} else {
currentKeyWriter.writeInt(0, in1.getInt(0));
}
currentKeyWriter.complete();
// look up output buffer using current group key
BytesMap.LookupInfo<BinaryRowData, BinaryRowData> lookupInfo = aggregateMap.lookup(currentKey);
BinaryRowData currentAggBuffer = lookupInfo.getValue();
if (!lookupInfo.isFound()) {
// append empty agg buffer into aggregate map for current group key
try {
currentAggBuffer = aggregateMap.append(lookupInfo, emptyAggBuffer);
} catch (EOFException exp) {
// hash map out of memory, spill to external sorter
if (sorter == null) {
sorter = new BufferedKVExternalSorter(getIOManager(), new BinaryRowDataSerializer(keyTypes.length), new BinaryRowDataSerializer(aggBufferTypes.length), new IntNormalizedKeyComputer(), new IntRecordComparator(), getMemoryManager().getPageSize(), getConf());
}
// sort and spill
sorter.sortAndSpill(aggregateMap.getRecordAreaMemorySegments(), aggregateMap.getNumElements(), new BytesHashMapSpillMemorySegmentPool(aggregateMap.getBucketAreaMemorySegments()));
// retry append
// reset aggregate map retry append
aggregateMap.reset();
lookupInfo = aggregateMap.lookup(currentKey);
try {
currentAggBuffer = aggregateMap.append(lookupInfo, emptyAggBuffer);
} catch (EOFException e) {
throw new OutOfMemoryError("BytesHashMap Out of Memory.");
}
}
}
if (!in1.isNullAt(1)) {
long sumInput = in1.getLong(1);
if (currentAggBuffer.isNullAt(0)) {
currentAggBuffer.setLong(0, sumInput);
} else {
currentAggBuffer.setLong(0, sumInput + currentAggBuffer.getLong(0));
}
}
}
use of org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer in project flink by apache.
the class ResettableExternalBufferTest method testHugeRecord.
@Test
public void testHugeRecord() throws Exception {
thrown.expect(IOException.class);
try (ResettableExternalBuffer buffer = new ResettableExternalBuffer(ioManager, new LazyMemorySegmentPool(this, memManager, 3 * DEFAULT_PAGE_SIZE / memManager.getPageSize()), new BinaryRowDataSerializer(1), false)) {
writeHuge(buffer, 10);
writeHuge(buffer, 50000);
}
}
use of org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer in project flink by apache.
the class SortMergeJoinOperator method open.
@Override
public void open() throws Exception {
super.open();
Configuration conf = getContainingTask().getJobConfiguration();
isFinished = new boolean[] { false, false };
collector = new StreamRecordCollector<>(output);
ClassLoader cl = getUserCodeClassloader();
AbstractRowDataSerializer inputSerializer1 = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(cl);
this.serializer1 = new BinaryRowDataSerializer(inputSerializer1.getArity());
AbstractRowDataSerializer inputSerializer2 = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn2(cl);
this.serializer2 = new BinaryRowDataSerializer(inputSerializer2.getArity());
this.memManager = this.getContainingTask().getEnvironment().getMemoryManager();
this.ioManager = this.getContainingTask().getEnvironment().getIOManager();
long totalMemory = computeMemorySize();
externalBufferMemory = (long) (totalMemory * externalBufferMemRatio);
externalBufferMemory = Math.max(externalBufferMemory, ResettableExternalBuffer.MIN_NUM_MEMORY);
long totalSortMem = totalMemory - (type.equals(FlinkJoinType.FULL) ? externalBufferMemory * 2 : externalBufferMemory);
if (totalSortMem < 0) {
throw new TableException("Memory size is too small: " + totalMemory + ", please increase manage memory of task manager.");
}
// sorter1
this.sorter1 = new BinaryExternalSorter(this.getContainingTask(), memManager, totalSortMem / 2, ioManager, inputSerializer1, serializer1, computer1.newInstance(cl), comparator1.newInstance(cl), conf);
this.sorter1.startThreads();
// sorter2
this.sorter2 = new BinaryExternalSorter(this.getContainingTask(), memManager, totalSortMem / 2, ioManager, inputSerializer2, serializer2, computer2.newInstance(cl), comparator2.newInstance(cl), conf);
this.sorter2.startThreads();
keyComparator = genKeyComparator.newInstance(cl);
this.condFunc = condFuncCode.newInstance(cl);
condFunc.setRuntimeContext(getRuntimeContext());
condFunc.open(new Configuration());
projection1 = projectionCode1.newInstance(cl);
projection2 = projectionCode2.newInstance(cl);
this.leftNullRow = new GenericRowData(serializer1.getArity());
this.rightNullRow = new GenericRowData(serializer2.getArity());
this.joinedRow = new JoinedRowData();
condFuncCode = null;
computer1 = null;
comparator1 = null;
computer2 = null;
comparator2 = null;
projectionCode1 = null;
projectionCode2 = null;
genKeyComparator = null;
getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) () -> sorter1.getUsedMemoryInBytes() + sorter2.getUsedMemoryInBytes());
getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) () -> sorter1.getNumSpillFiles() + sorter2.getNumSpillFiles());
getMetricGroup().gauge("spillInBytes", (Gauge<Long>) () -> sorter1.getSpillInBytes() + sorter2.getSpillInBytes());
}
Aggregations