use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.
the class BinaryRowDataTest method testRawValueData.
@Test
public void testRawValueData() {
BinaryRowData row = new BinaryRowData(3);
BinaryRowWriter writer = new BinaryRowWriter(row);
RawValueDataSerializer<String> binarySerializer = new RawValueDataSerializer<>(StringSerializer.INSTANCE);
RawValueData<String> hahah = RawValueData.fromObject("hahah");
writer.writeRawValue(0, hahah, binarySerializer);
writer.setNullAt(1);
writer.writeRawValue(2, hahah, binarySerializer);
writer.complete();
RawValueData<String> generic0 = row.getRawValue(0);
assertThat(generic0, equivalent(hahah, binarySerializer));
assertTrue(row.isNullAt(1));
RawValueData<String> generic2 = row.getRawValue(2);
assertThat(generic2, equivalent(hahah, binarySerializer));
}
use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.
the class BinaryHashTableTest method testSpillingHashJoinWithTwoRecursions.
/*
* This test is basically identical to the "testSpillingHashJoinWithMassiveCollisions" test, only that the number
* of repeated values (causing bucket collisions) are large enough to make sure that their target partition no longer
* fits into memory by itself and needs to be repartitioned in the recursion again.
*/
@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
// the following two values are known to have a hash-code collision on the first recursion
// level.
// we use them to make sure one partition grows over-proportionally large
final int repeatedValue1 = 40559;
final int repeatedValue2 = 92882;
final int repeatedValueCountBuild = 200000;
final int repeatedValueCountProbe = 5;
final int numKeys = 1000000;
final int buildValsPerKey = 3;
final int probeValsPerKey = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key, plus
// 400k pairs with two colliding keys
MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
builds.add(build1);
builds.add(build2);
builds.add(build3);
MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
probes.add(probe1);
probes.add(probe2);
probes.add(probe3);
MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
// create the map for validating the results
HashMap<Integer, Long> map = new HashMap<>(numKeys);
// ----------------------------------------------------------------------------------------
MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 896 * PAGE_SIZE, ioManager);
final BinaryRowData recordReuse = new BinaryRowData(2);
BinaryRowData buildRow = buildSideSerializer.createInstance();
while ((buildRow = buildInput.next(buildRow)) != null) {
table.putBuildRow(buildRow);
}
table.endBuild();
BinaryRowData probeRow = probeSideSerializer.createInstance();
while ((probeRow = probeInput.next(probeRow)) != null) {
if (table.tryProbe(probeRow)) {
testJoin(table, map);
}
}
while (table.nextMatching()) {
testJoin(table, map);
}
table.close();
Assert.assertEquals("Wrong number of keys", numKeys, map.size());
for (Map.Entry<Integer, Long> entry : map.entrySet()) {
long val = entry.getValue();
int key = entry.getKey();
Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == repeatedValue1 || key == repeatedValue2) ? (probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) : probeValsPerKey * buildValsPerKey, val);
}
// ----------------------------------------------------------------------------------------
table.free();
}
use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.
the class SumHashAggTestOperator method processElement.
@Override
public void processElement(StreamRecord<RowData> element) throws Exception {
RowData in1 = element.getValue();
// project key from input
currentKeyWriter.reset();
if (in1.isNullAt(0)) {
currentKeyWriter.setNullAt(0);
} else {
currentKeyWriter.writeInt(0, in1.getInt(0));
}
currentKeyWriter.complete();
// look up output buffer using current group key
BytesMap.LookupInfo<BinaryRowData, BinaryRowData> lookupInfo = aggregateMap.lookup(currentKey);
BinaryRowData currentAggBuffer = lookupInfo.getValue();
if (!lookupInfo.isFound()) {
// append empty agg buffer into aggregate map for current group key
try {
currentAggBuffer = aggregateMap.append(lookupInfo, emptyAggBuffer);
} catch (EOFException exp) {
// hash map out of memory, spill to external sorter
if (sorter == null) {
sorter = new BufferedKVExternalSorter(getIOManager(), new BinaryRowDataSerializer(keyTypes.length), new BinaryRowDataSerializer(aggBufferTypes.length), new IntNormalizedKeyComputer(), new IntRecordComparator(), getMemoryManager().getPageSize(), getConf());
}
// sort and spill
sorter.sortAndSpill(aggregateMap.getRecordAreaMemorySegments(), aggregateMap.getNumElements(), new BytesHashMapSpillMemorySegmentPool(aggregateMap.getBucketAreaMemorySegments()));
// retry append
// reset aggregate map retry append
aggregateMap.reset();
lookupInfo = aggregateMap.lookup(currentKey);
try {
currentAggBuffer = aggregateMap.append(lookupInfo, emptyAggBuffer);
} catch (EOFException e) {
throw new OutOfMemoryError("BytesHashMap Out of Memory.");
}
}
}
if (!in1.isNullAt(1)) {
long sumInput = in1.getLong(1);
if (currentAggBuffer.isNullAt(0)) {
currentAggBuffer.setLong(0, sumInput);
} else {
currentAggBuffer.setLong(0, sumInput + currentAggBuffer.getLong(0));
}
}
}
use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.
the class NestedRowDataTest method getBinaryRowData.
private BinaryRowData getBinaryRowData() {
BinaryRowData row = new BinaryRowData(1);
BinaryRowWriter writer = new BinaryRowWriter(row);
GenericTypeInfo<MyObj> info = new GenericTypeInfo<>(MyObj.class);
TypeSerializer<MyObj> genericSerializer = info.createSerializer(new ExecutionConfig());
GenericRowData gRow = new GenericRowData(5);
gRow.setField(0, 1);
gRow.setField(1, 5L);
gRow.setField(2, StringData.fromString("12345678"));
gRow.setField(3, null);
gRow.setField(4, RawValueData.fromObject(new MyObj(15, 5)));
RowDataSerializer serializer = new RowDataSerializer(new LogicalType[] { DataTypes.INT().getLogicalType(), DataTypes.BIGINT().getLogicalType(), DataTypes.STRING().getLogicalType(), DataTypes.STRING().getLogicalType(), DataTypes.RAW(info.getTypeClass(), info.createSerializer(new ExecutionConfig())).getLogicalType() }, new TypeSerializer[] { IntSerializer.INSTANCE, LongSerializer.INSTANCE, StringDataSerializer.INSTANCE, StringDataSerializer.INSTANCE, new RawValueDataSerializer<>(genericSerializer) });
writer.writeRow(0, gRow, serializer);
writer.complete();
return row;
}
use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.
the class NestedRowDataTest method testNestInNestedRowData.
@Test
public void testNestInNestedRowData() {
// layer1
GenericRowData gRow = new GenericRowData(4);
gRow.setField(0, 1);
gRow.setField(1, 5L);
gRow.setField(2, StringData.fromString("12345678"));
gRow.setField(3, null);
// layer2
RowDataSerializer serializer = new RowDataSerializer(new LogicalType[] { DataTypes.INT().getLogicalType(), DataTypes.BIGINT().getLogicalType(), DataTypes.STRING().getLogicalType(), DataTypes.STRING().getLogicalType() }, new TypeSerializer[] { IntSerializer.INSTANCE, LongSerializer.INSTANCE, StringSerializer.INSTANCE, StringSerializer.INSTANCE });
BinaryRowData row = new BinaryRowData(2);
BinaryRowWriter writer = new BinaryRowWriter(row);
writer.writeString(0, StringData.fromString("hahahahafff"));
writer.writeRow(1, gRow, serializer);
writer.complete();
// layer3
BinaryRowData row2 = new BinaryRowData(1);
BinaryRowWriter writer2 = new BinaryRowWriter(row2);
writer2.writeRow(0, row, null);
writer2.complete();
// verify
{
NestedRowData nestedRow = (NestedRowData) row2.getRow(0, 2);
BinaryRowData binaryRow = new BinaryRowData(2);
binaryRow.pointTo(nestedRow.getSegments(), nestedRow.getOffset(), nestedRow.getSizeInBytes());
assertEquals(binaryRow, row);
}
assertEquals(row2.getRow(0, 2).getString(0), StringData.fromString("hahahahafff"));
RowData nestedRow = row2.getRow(0, 2).getRow(1, 4);
assertEquals(nestedRow.getInt(0), 1);
assertEquals(nestedRow.getLong(1), 5L);
assertEquals(nestedRow.getString(2), StringData.fromString("12345678"));
assertTrue(nestedRow.isNullAt(3));
}
Aggregations