use of org.apache.flink.runtime.io.disk.iomanager.IOManager in project flink by apache.
the class JoinDriver method prepare.
@Override
public void prepare() throws Exception {
final TaskConfig config = this.taskContext.getTaskConfig();
final Counter numRecordsIn = this.taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsInCounter();
// obtain task manager's memory manager and I/O manager
final MemoryManager memoryManager = this.taskContext.getMemoryManager();
final IOManager ioManager = this.taskContext.getIOManager();
// set up memory and I/O parameters
final double fractionAvailableMemory = config.getRelativeMemoryDriver();
final int numPages = memoryManager.computeNumberOfPages(fractionAvailableMemory);
// test minimum memory requirements
final DriverStrategy ls = config.getDriverStrategy();
final MutableObjectIterator<IT1> in1 = new CountingMutableObjectIterator<>(this.taskContext.<IT1>getInput(0), numRecordsIn);
final MutableObjectIterator<IT2> in2 = new CountingMutableObjectIterator<>(this.taskContext.<IT2>getInput(1), numRecordsIn);
// get the key positions and types
final TypeSerializer<IT1> serializer1 = this.taskContext.<IT1>getInputSerializer(0).getSerializer();
final TypeSerializer<IT2> serializer2 = this.taskContext.<IT2>getInputSerializer(1).getSerializer();
final TypeComparator<IT1> comparator1 = this.taskContext.getDriverComparator(0);
final TypeComparator<IT2> comparator2 = this.taskContext.getDriverComparator(1);
final TypePairComparatorFactory<IT1, IT2> pairComparatorFactory = config.getPairComparatorFactory(this.taskContext.getUserCodeClassLoader());
if (pairComparatorFactory == null) {
throw new Exception("Missing pair comparator factory for join driver");
}
ExecutionConfig executionConfig = taskContext.getExecutionConfig();
boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled();
if (LOG.isDebugEnabled()) {
LOG.debug("Join Driver object reuse: " + (objectReuseEnabled ? "ENABLED" : "DISABLED") + ".");
}
boolean hashJoinUseBitMaps = taskContext.getTaskManagerInfo().getConfiguration().getBoolean(ConfigConstants.RUNTIME_HASH_JOIN_BLOOM_FILTERS_KEY, ConfigConstants.DEFAULT_RUNTIME_HASH_JOIN_BLOOM_FILTERS);
// create and return joining iterator according to provided local strategy.
if (objectReuseEnabled) {
switch(ls) {
case INNER_MERGE:
this.joinIterator = new ReusingMergeInnerJoinIterator<>(in1, in2, serializer1, comparator1, serializer2, comparator2, pairComparatorFactory.createComparator12(comparator1, comparator2), memoryManager, ioManager, numPages, this.taskContext.getContainingTask());
break;
case HYBRIDHASH_BUILD_FIRST:
this.joinIterator = new ReusingBuildFirstHashJoinIterator<>(in1, in2, serializer1, comparator1, serializer2, comparator2, pairComparatorFactory.createComparator21(comparator1, comparator2), memoryManager, ioManager, this.taskContext.getContainingTask(), fractionAvailableMemory, false, false, hashJoinUseBitMaps);
break;
case HYBRIDHASH_BUILD_SECOND:
this.joinIterator = new ReusingBuildSecondHashJoinIterator<>(in1, in2, serializer1, comparator1, serializer2, comparator2, pairComparatorFactory.createComparator12(comparator1, comparator2), memoryManager, ioManager, this.taskContext.getContainingTask(), fractionAvailableMemory, false, false, hashJoinUseBitMaps);
break;
default:
throw new Exception("Unsupported driver strategy for join driver: " + ls.name());
}
} else {
switch(ls) {
case INNER_MERGE:
this.joinIterator = new NonReusingMergeInnerJoinIterator<>(in1, in2, serializer1, comparator1, serializer2, comparator2, pairComparatorFactory.createComparator12(comparator1, comparator2), memoryManager, ioManager, numPages, this.taskContext.getContainingTask());
break;
case HYBRIDHASH_BUILD_FIRST:
this.joinIterator = new NonReusingBuildFirstHashJoinIterator<>(in1, in2, serializer1, comparator1, serializer2, comparator2, pairComparatorFactory.createComparator21(comparator1, comparator2), memoryManager, ioManager, this.taskContext.getContainingTask(), fractionAvailableMemory, false, false, hashJoinUseBitMaps);
break;
case HYBRIDHASH_BUILD_SECOND:
this.joinIterator = new NonReusingBuildSecondHashJoinIterator<>(in1, in2, serializer1, comparator1, serializer2, comparator2, pairComparatorFactory.createComparator12(comparator1, comparator2), memoryManager, ioManager, this.taskContext.getContainingTask(), fractionAvailableMemory, false, false, hashJoinUseBitMaps);
break;
default:
throw new Exception("Unsupported driver strategy for join driver: " + ls.name());
}
}
// open the iterator - this triggers the sorting or hash-table building
// and blocks until the iterator is ready
this.joinIterator.open();
if (LOG.isDebugEnabled()) {
LOG.debug(this.taskContext.formatLogString("join task iterator ready."));
}
}
use of org.apache.flink.runtime.io.disk.iomanager.IOManager in project flink by apache.
the class LargeRecordHandlerITCase method fileTest.
@Test
public void fileTest() {
final IOManager ioMan = new IOManagerAsync();
final int PAGE_SIZE = 4 * 1024;
final int NUM_PAGES = 4;
final int NUM_RECORDS = 10;
FileIOChannel.ID channel = null;
try {
final MemoryManager memMan = new MemoryManager(NUM_PAGES * PAGE_SIZE, 1, PAGE_SIZE, MemoryType.HEAP, true);
final AbstractInvokable owner = new DummyInvokable();
final List<MemorySegment> memory = memMan.allocatePages(owner, NUM_PAGES);
final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeVeryLongValue>(SomeVeryLongValue.class), BasicTypeInfo.BYTE_TYPE_INFO };
final TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>> typeInfo = new TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>>(types);
final TypeSerializer<Tuple3<Long, SomeVeryLongValue, Byte>> serializer = typeInfo.createSerializer(new ExecutionConfig());
channel = ioMan.createChannel();
FileChannelOutputView out = new FileChannelOutputView(ioMan.createBlockChannelWriter(channel), memMan, memory, PAGE_SIZE);
// add the test data
Random rnd = new Random();
List<Long> offsets = new ArrayList<Long>();
for (int i = 0; i < NUM_RECORDS; i++) {
offsets.add(out.getWriteOffset());
long val = rnd.nextLong();
Tuple3<Long, SomeVeryLongValue, Byte> next = new Tuple3<Long, SomeVeryLongValue, Byte>(val, new SomeVeryLongValue((int) val), (byte) val);
serializer.serialize(next, out);
}
out.close();
for (int i = 1; i < offsets.size(); i++) {
assertTrue(offsets.get(i) > offsets.get(i - 1));
}
memMan.allocatePages(owner, memory, NUM_PAGES);
SeekableFileChannelInputView in = new SeekableFileChannelInputView(ioMan, channel, memMan, memory, out.getBytesInLatestSegment());
for (int i = 0; i < NUM_RECORDS; i++) {
in.seek(offsets.get(i));
Tuple3<Long, SomeVeryLongValue, Byte> next = serializer.deserialize(in);
// key and value must be equal
assertTrue(next.f0.intValue() == next.f1.val());
assertTrue(next.f0.byteValue() == next.f2);
}
in.closeAndDelete();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (channel != null) {
try {
ioMan.deleteChannel(channel);
} catch (IOException ignored) {
}
}
ioMan.shutdown();
}
}
use of org.apache.flink.runtime.io.disk.iomanager.IOManager in project flink by apache.
the class LargeRecordHandlerITCase method testRecordHandlerCompositeKey.
@Test
public void testRecordHandlerCompositeKey() {
final IOManager ioMan = new IOManagerAsync();
final int PAGE_SIZE = 4 * 1024;
final int NUM_PAGES = 1000;
final int NUM_RECORDS = 10;
try {
final MemoryManager memMan = new MemoryManager(NUM_PAGES * PAGE_SIZE, 1, PAGE_SIZE, MemoryType.HEAP, true);
final AbstractInvokable owner = new DummyInvokable();
final List<MemorySegment> initialMemory = memMan.allocatePages(owner, 6);
final List<MemorySegment> sortMemory = memMan.allocatePages(owner, NUM_PAGES - 6);
final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeVeryLongValue>(SomeVeryLongValue.class), BasicTypeInfo.BYTE_TYPE_INFO };
final TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>> typeInfo = new TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>>(types);
final TypeSerializer<Tuple3<Long, SomeVeryLongValue, Byte>> serializer = typeInfo.createSerializer(new ExecutionConfig());
final TypeComparator<Tuple3<Long, SomeVeryLongValue, Byte>> comparator = typeInfo.createComparator(new int[] { 2, 0 }, new boolean[] { true, true }, 0, new ExecutionConfig());
LargeRecordHandler<Tuple3<Long, SomeVeryLongValue, Byte>> handler = new LargeRecordHandler<Tuple3<Long, SomeVeryLongValue, Byte>>(serializer, comparator, ioMan, memMan, initialMemory, owner, 128);
assertFalse(handler.hasData());
// add the test data
Random rnd = new Random();
for (int i = 0; i < NUM_RECORDS; i++) {
long val = rnd.nextLong();
handler.addRecord(new Tuple3<Long, SomeVeryLongValue, Byte>(val, new SomeVeryLongValue((int) val), (byte) val));
assertTrue(handler.hasData());
}
MutableObjectIterator<Tuple3<Long, SomeVeryLongValue, Byte>> sorted = handler.finishWriteAndSortKeys(sortMemory);
try {
handler.addRecord(new Tuple3<Long, SomeVeryLongValue, Byte>(92L, null, (byte) 1));
fail("should throw an exception");
} catch (IllegalStateException e) {
// expected
}
Tuple3<Long, SomeVeryLongValue, Byte> previous = null;
Tuple3<Long, SomeVeryLongValue, Byte> next;
while ((next = sorted.next(null)) != null) {
// key and value must be equal
assertTrue(next.f0.intValue() == next.f1.val());
assertTrue(next.f0.byteValue() == next.f2);
// order must be correct
if (previous != null) {
assertTrue(previous.f2 <= next.f2);
assertTrue(previous.f2.byteValue() != next.f2.byteValue() || previous.f0 <= next.f0);
}
previous = next;
}
handler.close();
assertFalse(handler.hasData());
handler.close();
try {
handler.addRecord(new Tuple3<Long, SomeVeryLongValue, Byte>(92L, null, (byte) 1));
fail("should throw an exception");
} catch (IllegalStateException e) {
// expected
}
assertTrue(memMan.verifyEmpty());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
ioMan.shutdown();
}
}
use of org.apache.flink.runtime.io.disk.iomanager.IOManager in project flink by apache.
the class LargeRecordHandlerTest method testRecordHandlerCompositeKey.
@Test
public void testRecordHandlerCompositeKey() {
final IOManager ioMan = new IOManagerAsync();
final int PAGE_SIZE = 4 * 1024;
final int NUM_PAGES = 24;
final int NUM_RECORDS = 25000;
try {
final MemoryManager memMan = new MemoryManager(NUM_PAGES * PAGE_SIZE, 1, PAGE_SIZE, MemoryType.HEAP, true);
final AbstractInvokable owner = new DummyInvokable();
final List<MemorySegment> initialMemory = memMan.allocatePages(owner, 6);
final List<MemorySegment> sortMemory = memMan.allocatePages(owner, NUM_PAGES - 6);
final TupleTypeInfo<Tuple3<Long, String, Byte>> typeInfo = (TupleTypeInfo<Tuple3<Long, String, Byte>>) TypeInfoParser.<Tuple3<Long, String, Byte>>parse("Tuple3<Long, String, Byte>");
final TypeSerializer<Tuple3<Long, String, Byte>> serializer = typeInfo.createSerializer(new ExecutionConfig());
final TypeComparator<Tuple3<Long, String, Byte>> comparator = typeInfo.createComparator(new int[] { 2, 0 }, new boolean[] { true, true }, 0, new ExecutionConfig());
LargeRecordHandler<Tuple3<Long, String, Byte>> handler = new LargeRecordHandler<Tuple3<Long, String, Byte>>(serializer, comparator, ioMan, memMan, initialMemory, owner, 128);
assertFalse(handler.hasData());
// add the test data
Random rnd = new Random();
for (int i = 0; i < NUM_RECORDS; i++) {
long val = rnd.nextLong();
handler.addRecord(new Tuple3<Long, String, Byte>(val, String.valueOf(val), (byte) val));
assertTrue(handler.hasData());
}
MutableObjectIterator<Tuple3<Long, String, Byte>> sorted = handler.finishWriteAndSortKeys(sortMemory);
try {
handler.addRecord(new Tuple3<Long, String, Byte>(92L, "peter pepper", (byte) 1));
fail("should throw an exception");
} catch (IllegalStateException e) {
// expected
}
Tuple3<Long, String, Byte> previous = null;
Tuple3<Long, String, Byte> next;
while ((next = sorted.next(null)) != null) {
// key and value must be equal
assertTrue(next.f0.equals(Long.parseLong(next.f1)));
assertTrue(next.f0.byteValue() == next.f2);
// order must be correct
if (previous != null) {
assertTrue(previous.f2 <= next.f2);
assertTrue(previous.f2.byteValue() != next.f2.byteValue() || previous.f0 <= next.f0);
}
previous = next;
}
handler.close();
assertFalse(handler.hasData());
handler.close();
try {
handler.addRecord(new Tuple3<Long, String, Byte>(92L, "peter pepper", (byte) 1));
fail("should throw an exception");
} catch (IllegalStateException e) {
// expected
}
assertTrue(memMan.verifyEmpty());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
ioMan.shutdown();
}
}
use of org.apache.flink.runtime.io.disk.iomanager.IOManager in project flink by apache.
the class RocksDBStateBackendConfigTest method getMockEnvironment.
static Environment getMockEnvironment(File[] tempDirs) {
final String[] tempDirStrings = new String[tempDirs.length];
for (int i = 0; i < tempDirs.length; i++) {
tempDirStrings[i] = tempDirs[i].getAbsolutePath();
}
IOManager ioMan = mock(IOManager.class);
when(ioMan.getSpillingDirectories()).thenReturn(tempDirs);
Environment env = mock(Environment.class);
when(env.getJobID()).thenReturn(new JobID());
when(env.getUserClassLoader()).thenReturn(RocksDBStateBackendConfigTest.class.getClassLoader());
when(env.getIOManager()).thenReturn(ioMan);
when(env.getTaskKvStateRegistry()).thenReturn(new KvStateRegistry().createTaskRegistry(new JobID(), new JobVertexID()));
TaskInfo taskInfo = mock(TaskInfo.class);
when(env.getTaskInfo()).thenReturn(taskInfo);
when(taskInfo.getIndexOfThisSubtask()).thenReturn(0);
TaskManagerRuntimeInfo tmInfo = new TestingTaskManagerRuntimeInfo(new Configuration(), tempDirStrings);
when(env.getTaskManagerInfo()).thenReturn(tmInfo);
return env;
}
Aggregations