Search in sources :

Example 11 with BinaryRowDataSerializer

use of org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer in project flink by apache.

the class BinaryRowDataTest method testGenericObject.

@Test
public void testGenericObject() throws Exception {
    GenericTypeInfo<MyObj> info = new GenericTypeInfo<>(MyObj.class);
    TypeSerializer<MyObj> genericSerializer = info.createSerializer(new ExecutionConfig());
    RawValueDataSerializer<MyObj> binarySerializer = new RawValueDataSerializer<>(genericSerializer);
    BinaryRowData row = new BinaryRowData(4);
    BinaryRowWriter writer = new BinaryRowWriter(row);
    writer.writeInt(0, 0);
    RawValueData<MyObj> myObj1 = RawValueData.fromObject(new MyObj(0, 1));
    writer.writeRawValue(1, myObj1, binarySerializer);
    RawValueData<MyObj> myObj2 = RawValueData.fromObject(new MyObj(123, 5.0));
    writer.writeRawValue(2, myObj2, binarySerializer);
    RawValueData<MyObj> myObj3 = RawValueData.fromObject(new MyObj(1, 1));
    writer.writeRawValue(3, myObj3, binarySerializer);
    writer.complete();
    assertTestGenericObjectRow(row, genericSerializer);
    // getBytes from var-length memorySegments.
    BinaryRowDataSerializer serializer = new BinaryRowDataSerializer(4);
    MemorySegment[] memorySegments = new MemorySegment[3];
    ArrayList<MemorySegment> memorySegmentList = new ArrayList<>();
    for (int i = 0; i < 3; i++) {
        memorySegments[i] = MemorySegmentFactory.wrap(new byte[64]);
        memorySegmentList.add(memorySegments[i]);
    }
    RandomAccessOutputView out = new RandomAccessOutputView(memorySegments, 64);
    serializer.serializeToPages(row, out);
    BinaryRowData mapRow = serializer.createInstance();
    mapRow = serializer.mapFromPages(mapRow, new RandomAccessInputView(memorySegmentList, 64));
    assertTestGenericObjectRow(mapRow, genericSerializer);
}
Also used : RandomAccessInputView(org.apache.flink.runtime.io.disk.RandomAccessInputView) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) RandomAccessOutputView(org.apache.flink.runtime.io.disk.RandomAccessOutputView) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) MemorySegment(org.apache.flink.core.memory.MemorySegment) RawValueDataSerializer(org.apache.flink.table.runtime.typeutils.RawValueDataSerializer) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) MyObj(org.apache.flink.table.data.util.DataFormatTestUtil.MyObj) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer) Test(org.junit.Test)

Example 12 with BinaryRowDataSerializer

use of org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer in project flink by apache.

the class BinaryRowDataTest method testSerializeVariousSize.

@Test
public void testSerializeVariousSize() throws IOException {
    // in this test, we are going to start serializing from the i-th byte (i in 0...`segSize`)
    // and the size of the row we're going to serialize is j bytes
    // (j in `rowFixLength` to the maximum length we can write)
    int segSize = 64;
    int segTotalNumber = 3;
    BinaryRowData row = new BinaryRowData(1);
    BinaryRowWriter writer = new BinaryRowWriter(row);
    Random random = new Random();
    byte[] bytes = new byte[1024];
    random.nextBytes(bytes);
    writer.writeBinary(0, bytes);
    writer.complete();
    MemorySegment[] memorySegments = new MemorySegment[segTotalNumber];
    Map<MemorySegment, Integer> msIndex = new HashMap<>();
    for (int i = 0; i < segTotalNumber; i++) {
        memorySegments[i] = MemorySegmentFactory.wrap(new byte[segSize]);
        msIndex.put(memorySegments[i], i);
    }
    BinaryRowDataSerializer serializer = new BinaryRowDataSerializer(1);
    int rowSizeInt = 4;
    // note that as there is only one field in the row, the fixed-length part is 16 bytes
    // (header + 1 field)
    int rowFixLength = 16;
    for (int i = 0; i < segSize; i++) {
        // this is the maximum row size we can serialize
        // if we are going to serialize from the i-th byte of the input view
        int maxRowSize = (segSize * segTotalNumber) - i - rowSizeInt;
        if (segSize - i < rowFixLength + rowSizeInt) {
            // oops, we can't write the whole fixed-length part in the first segment
            // because the remaining space is too small, so we have to start serializing from
            // the second segment.
            // when serializing, we need to first write the length of the row,
            // then write the fixed-length part of the row.
            maxRowSize -= segSize - i;
        }
        for (int j = rowFixLength; j < maxRowSize; j++) {
            // ok, now we're going to serialize a row of j bytes
            testSerialize(row, memorySegments, msIndex, serializer, i, j);
        }
    }
}
Also used : Random(java.util.Random) HashMap(java.util.HashMap) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) MemorySegment(org.apache.flink.core.memory.MemorySegment) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer) Test(org.junit.Test)

Example 13 with BinaryRowDataSerializer

use of org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer in project flink by apache.

the class SumHashAggTestOperator method processElement.

@Override
public void processElement(StreamRecord<RowData> element) throws Exception {
    RowData in1 = element.getValue();
    // project key from input
    currentKeyWriter.reset();
    if (in1.isNullAt(0)) {
        currentKeyWriter.setNullAt(0);
    } else {
        currentKeyWriter.writeInt(0, in1.getInt(0));
    }
    currentKeyWriter.complete();
    // look up output buffer using current group key
    BytesMap.LookupInfo<BinaryRowData, BinaryRowData> lookupInfo = aggregateMap.lookup(currentKey);
    BinaryRowData currentAggBuffer = lookupInfo.getValue();
    if (!lookupInfo.isFound()) {
        // append empty agg buffer into aggregate map for current group key
        try {
            currentAggBuffer = aggregateMap.append(lookupInfo, emptyAggBuffer);
        } catch (EOFException exp) {
            // hash map out of memory, spill to external sorter
            if (sorter == null) {
                sorter = new BufferedKVExternalSorter(getIOManager(), new BinaryRowDataSerializer(keyTypes.length), new BinaryRowDataSerializer(aggBufferTypes.length), new IntNormalizedKeyComputer(), new IntRecordComparator(), getMemoryManager().getPageSize(), getConf());
            }
            // sort and spill
            sorter.sortAndSpill(aggregateMap.getRecordAreaMemorySegments(), aggregateMap.getNumElements(), new BytesHashMapSpillMemorySegmentPool(aggregateMap.getBucketAreaMemorySegments()));
            // retry append
            // reset aggregate map retry append
            aggregateMap.reset();
            lookupInfo = aggregateMap.lookup(currentKey);
            try {
                currentAggBuffer = aggregateMap.append(lookupInfo, emptyAggBuffer);
            } catch (EOFException e) {
                throw new OutOfMemoryError("BytesHashMap Out of Memory.");
            }
        }
    }
    if (!in1.isNullAt(1)) {
        long sumInput = in1.getLong(1);
        if (currentAggBuffer.isNullAt(0)) {
            currentAggBuffer.setLong(0, sumInput);
        } else {
            currentAggBuffer.setLong(0, sumInput + currentAggBuffer.getLong(0));
        }
    }
}
Also used : IntRecordComparator(org.apache.flink.table.runtime.operators.sort.IntRecordComparator) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) IntNormalizedKeyComputer(org.apache.flink.table.runtime.operators.sort.IntNormalizedKeyComputer) BytesMap(org.apache.flink.table.runtime.util.collections.binary.BytesMap) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) EOFException(java.io.EOFException) BufferedKVExternalSorter(org.apache.flink.table.runtime.operators.sort.BufferedKVExternalSorter) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Example 14 with BinaryRowDataSerializer

use of org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer in project flink by apache.

the class ResettableExternalBufferTest method testHugeRecord.

@Test
public void testHugeRecord() throws Exception {
    thrown.expect(IOException.class);
    try (ResettableExternalBuffer buffer = new ResettableExternalBuffer(ioManager, new LazyMemorySegmentPool(this, memManager, 3 * DEFAULT_PAGE_SIZE / memManager.getPageSize()), new BinaryRowDataSerializer(1), false)) {
        writeHuge(buffer, 10);
        writeHuge(buffer, 50000);
    }
}
Also used : BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer) Test(org.junit.Test)

Example 15 with BinaryRowDataSerializer

use of org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer in project flink by apache.

the class SortMergeJoinOperator method open.

@Override
public void open() throws Exception {
    super.open();
    Configuration conf = getContainingTask().getJobConfiguration();
    isFinished = new boolean[] { false, false };
    collector = new StreamRecordCollector<>(output);
    ClassLoader cl = getUserCodeClassloader();
    AbstractRowDataSerializer inputSerializer1 = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(cl);
    this.serializer1 = new BinaryRowDataSerializer(inputSerializer1.getArity());
    AbstractRowDataSerializer inputSerializer2 = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn2(cl);
    this.serializer2 = new BinaryRowDataSerializer(inputSerializer2.getArity());
    this.memManager = this.getContainingTask().getEnvironment().getMemoryManager();
    this.ioManager = this.getContainingTask().getEnvironment().getIOManager();
    long totalMemory = computeMemorySize();
    externalBufferMemory = (long) (totalMemory * externalBufferMemRatio);
    externalBufferMemory = Math.max(externalBufferMemory, ResettableExternalBuffer.MIN_NUM_MEMORY);
    long totalSortMem = totalMemory - (type.equals(FlinkJoinType.FULL) ? externalBufferMemory * 2 : externalBufferMemory);
    if (totalSortMem < 0) {
        throw new TableException("Memory size is too small: " + totalMemory + ", please increase manage memory of task manager.");
    }
    // sorter1
    this.sorter1 = new BinaryExternalSorter(this.getContainingTask(), memManager, totalSortMem / 2, ioManager, inputSerializer1, serializer1, computer1.newInstance(cl), comparator1.newInstance(cl), conf);
    this.sorter1.startThreads();
    // sorter2
    this.sorter2 = new BinaryExternalSorter(this.getContainingTask(), memManager, totalSortMem / 2, ioManager, inputSerializer2, serializer2, computer2.newInstance(cl), comparator2.newInstance(cl), conf);
    this.sorter2.startThreads();
    keyComparator = genKeyComparator.newInstance(cl);
    this.condFunc = condFuncCode.newInstance(cl);
    condFunc.setRuntimeContext(getRuntimeContext());
    condFunc.open(new Configuration());
    projection1 = projectionCode1.newInstance(cl);
    projection2 = projectionCode2.newInstance(cl);
    this.leftNullRow = new GenericRowData(serializer1.getArity());
    this.rightNullRow = new GenericRowData(serializer2.getArity());
    this.joinedRow = new JoinedRowData();
    condFuncCode = null;
    computer1 = null;
    comparator1 = null;
    computer2 = null;
    comparator2 = null;
    projectionCode1 = null;
    projectionCode2 = null;
    genKeyComparator = null;
    getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) () -> sorter1.getUsedMemoryInBytes() + sorter2.getUsedMemoryInBytes());
    getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) () -> sorter1.getNumSpillFiles() + sorter2.getNumSpillFiles());
    getMetricGroup().gauge("spillInBytes", (Gauge<Long>) () -> sorter1.getSpillInBytes() + sorter2.getSpillInBytes());
}
Also used : AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer) TableException(org.apache.flink.table.api.TableException) Configuration(org.apache.flink.configuration.Configuration) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) BinaryExternalSorter(org.apache.flink.table.runtime.operators.sort.BinaryExternalSorter) GenericRowData(org.apache.flink.table.data.GenericRowData) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Aggregations

BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)19 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)9 Before (org.junit.Before)7 MemorySegment (org.apache.flink.core.memory.MemorySegment)5 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)5 RowData (org.apache.flink.table.data.RowData)5 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)4 BinaryRowWriter (org.apache.flink.table.data.writer.BinaryRowWriter)4 IntRecordComparator (org.apache.flink.table.runtime.operators.sort.IntRecordComparator)4 Configuration (org.apache.flink.configuration.Configuration)3 RandomAccessInputView (org.apache.flink.runtime.io.disk.RandomAccessInputView)3 RandomAccessOutputView (org.apache.flink.runtime.io.disk.RandomAccessOutputView)3 GenericRowData (org.apache.flink.table.data.GenericRowData)3 MyProjection (org.apache.flink.table.runtime.operators.join.Int2HashJoinOperatorTest.MyProjection)3 LazyMemorySegmentPool (org.apache.flink.table.runtime.util.LazyMemorySegmentPool)3 ResettableExternalBuffer (org.apache.flink.table.runtime.util.ResettableExternalBuffer)3 Random (java.util.Random)2 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)2 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)2