Search in sources :

Example 11 with MemoryManager

use of org.apache.flink.runtime.memory.MemoryManager in project flink by apache.

the class TaskExecutor method submitTask.

// ----------------------------------------------------------------------
// Task lifecycle RPCs
// ----------------------------------------------------------------------
@Override
public CompletableFuture<Acknowledge> submitTask(TaskDeploymentDescriptor tdd, JobMasterId jobMasterId, Time timeout) {
    try {
        final JobID jobId = tdd.getJobId();
        final ExecutionAttemptID executionAttemptID = tdd.getExecutionAttemptId();
        final JobTable.Connection jobManagerConnection = jobTable.getConnection(jobId).orElseThrow(() -> {
            final String message = "Could not submit task because there is no JobManager " + "associated for the job " + jobId + '.';
            log.debug(message);
            return new TaskSubmissionException(message);
        });
        if (!Objects.equals(jobManagerConnection.getJobMasterId(), jobMasterId)) {
            final String message = "Rejecting the task submission because the job manager leader id " + jobMasterId + " does not match the expected job manager leader id " + jobManagerConnection.getJobMasterId() + '.';
            log.debug(message);
            throw new TaskSubmissionException(message);
        }
        if (!taskSlotTable.tryMarkSlotActive(jobId, tdd.getAllocationId())) {
            final String message = "No task slot allocated for job ID " + jobId + " and allocation ID " + tdd.getAllocationId() + '.';
            log.debug(message);
            throw new TaskSubmissionException(message);
        }
        // re-integrate offloaded data:
        try {
            tdd.loadBigData(taskExecutorBlobService.getPermanentBlobService());
        } catch (IOException | ClassNotFoundException e) {
            throw new TaskSubmissionException("Could not re-integrate offloaded TaskDeploymentDescriptor data.", e);
        }
        // deserialize the pre-serialized information
        final JobInformation jobInformation;
        final TaskInformation taskInformation;
        try {
            jobInformation = tdd.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
            taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
        } catch (IOException | ClassNotFoundException e) {
            throw new TaskSubmissionException("Could not deserialize the job or task information.", e);
        }
        if (!jobId.equals(jobInformation.getJobId())) {
            throw new TaskSubmissionException("Inconsistent job ID information inside TaskDeploymentDescriptor (" + tdd.getJobId() + " vs. " + jobInformation.getJobId() + ")");
        }
        TaskManagerJobMetricGroup jobGroup = taskManagerMetricGroup.addJob(jobInformation.getJobId(), jobInformation.getJobName());
        // note that a pre-existing job group can NOT be closed concurrently - this is done by
        // the same TM thread in removeJobMetricsGroup
        TaskMetricGroup taskMetricGroup = jobGroup.addTask(taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskInformation.getTaskName(), tdd.getSubtaskIndex(), tdd.getAttemptNumber());
        InputSplitProvider inputSplitProvider = new RpcInputSplitProvider(jobManagerConnection.getJobManagerGateway(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskManagerConfiguration.getRpcTimeout());
        final TaskOperatorEventGateway taskOperatorEventGateway = new RpcTaskOperatorEventGateway(jobManagerConnection.getJobManagerGateway(), executionAttemptID, (t) -> runAsync(() -> failTask(executionAttemptID, t)));
        TaskManagerActions taskManagerActions = jobManagerConnection.getTaskManagerActions();
        CheckpointResponder checkpointResponder = jobManagerConnection.getCheckpointResponder();
        GlobalAggregateManager aggregateManager = jobManagerConnection.getGlobalAggregateManager();
        LibraryCacheManager.ClassLoaderHandle classLoaderHandle = jobManagerConnection.getClassLoaderHandle();
        ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = jobManagerConnection.getResultPartitionConsumableNotifier();
        PartitionProducerStateChecker partitionStateChecker = jobManagerConnection.getPartitionStateChecker();
        final TaskLocalStateStore localStateStore = localStateStoresManager.localStateStoreForSubtask(jobId, tdd.getAllocationId(), taskInformation.getJobVertexId(), tdd.getSubtaskIndex());
        // TODO: Pass config value from user program and do overriding here.
        final StateChangelogStorage<?> changelogStorage;
        try {
            changelogStorage = changelogStoragesManager.stateChangelogStorageForJob(jobId, taskManagerConfiguration.getConfiguration(), jobGroup);
        } catch (IOException e) {
            throw new TaskSubmissionException(e);
        }
        final JobManagerTaskRestore taskRestore = tdd.getTaskRestore();
        final TaskStateManager taskStateManager = new TaskStateManagerImpl(jobId, tdd.getExecutionAttemptId(), localStateStore, changelogStorage, taskRestore, checkpointResponder);
        MemoryManager memoryManager;
        try {
            memoryManager = taskSlotTable.getTaskMemoryManager(tdd.getAllocationId());
        } catch (SlotNotFoundException e) {
            throw new TaskSubmissionException("Could not submit task.", e);
        }
        Task task = new Task(jobInformation, taskInformation, tdd.getExecutionAttemptId(), tdd.getAllocationId(), tdd.getSubtaskIndex(), tdd.getAttemptNumber(), tdd.getProducedPartitions(), tdd.getInputGates(), memoryManager, taskExecutorServices.getIOManager(), taskExecutorServices.getShuffleEnvironment(), taskExecutorServices.getKvStateService(), taskExecutorServices.getBroadcastVariableManager(), taskExecutorServices.getTaskEventDispatcher(), externalResourceInfoProvider, taskStateManager, taskManagerActions, inputSplitProvider, checkpointResponder, taskOperatorEventGateway, aggregateManager, classLoaderHandle, fileCache, taskManagerConfiguration, taskMetricGroup, resultPartitionConsumableNotifier, partitionStateChecker, getRpcService().getScheduledExecutor());
        taskMetricGroup.gauge(MetricNames.IS_BACK_PRESSURED, task::isBackPressured);
        log.info("Received task {} ({}), deploy into slot with allocation id {}.", task.getTaskInfo().getTaskNameWithSubtasks(), tdd.getExecutionAttemptId(), tdd.getAllocationId());
        boolean taskAdded;
        try {
            taskAdded = taskSlotTable.addTask(task);
        } catch (SlotNotFoundException | SlotNotActiveException e) {
            throw new TaskSubmissionException("Could not submit task.", e);
        }
        if (taskAdded) {
            task.startTaskThread();
            setupResultPartitionBookkeeping(tdd.getJobId(), tdd.getProducedPartitions(), task.getTerminationFuture());
            return CompletableFuture.completedFuture(Acknowledge.get());
        } else {
            final String message = "TaskManager already contains a task for id " + task.getExecutionId() + '.';
            log.debug(message);
            throw new TaskSubmissionException(message);
        }
    } catch (TaskSubmissionException e) {
        return FutureUtils.completedExceptionally(e);
    }
}
Also used : SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) Task(org.apache.flink.runtime.taskmanager.Task) SlotNotActiveException(org.apache.flink.runtime.taskexecutor.slot.SlotNotActiveException) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) RpcTaskOperatorEventGateway(org.apache.flink.runtime.taskexecutor.rpc.RpcTaskOperatorEventGateway) TaskOperatorEventGateway(org.apache.flink.runtime.jobgraph.tasks.TaskOperatorEventGateway) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) RpcTaskOperatorEventGateway(org.apache.flink.runtime.taskexecutor.rpc.RpcTaskOperatorEventGateway) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) RpcResultPartitionConsumableNotifier(org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) TaskLocalStateStore(org.apache.flink.runtime.state.TaskLocalStateStore) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) RpcCheckpointResponder(org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskManagerJobMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerJobMetricGroup) IOException(java.io.IOException) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) RpcGlobalAggregateManager(org.apache.flink.runtime.taskexecutor.rpc.RpcGlobalAggregateManager) JobID(org.apache.flink.api.common.JobID)

Example 12 with MemoryManager

use of org.apache.flink.runtime.memory.MemoryManager in project flink by apache.

the class FileChannelStreamsTest method testCloseAndDeleteInputView.

@Test
public void testCloseAndDeleteInputView() {
    try (IOManager ioManager = new IOManagerAsync()) {
        MemoryManager memMan = MemoryManagerBuilder.newBuilder().build();
        List<MemorySegment> memory = new ArrayList<MemorySegment>();
        memMan.allocatePages(new DummyInvokable(), memory, 4);
        FileIOChannel.ID channel = ioManager.createChannel();
        // add some test data
        try (FileWriter wrt = new FileWriter(channel.getPath())) {
            wrt.write("test data");
        }
        BlockChannelReader<MemorySegment> reader = ioManager.createBlockChannelReader(channel);
        FileChannelInputView in = new FileChannelInputView(reader, memMan, memory, 9);
        // read just something
        in.readInt();
        // close for the first time, make sure all memory returns
        in.close();
        assertTrue(memMan.verifyEmpty());
        // close again, should not cause an exception
        in.close();
        // delete, make sure file is removed
        in.closeAndDelete();
        assertFalse(new File(channel.getPath()).exists());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) FileIOChannel(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) MemorySegment(org.apache.flink.core.memory.MemorySegment) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) File(java.io.File) Test(org.junit.Test)

Example 13 with MemoryManager

use of org.apache.flink.runtime.memory.MemoryManager in project flink by apache.

the class SeekableFileChannelInputViewTest method testSeek.

@Test
public void testSeek() {
    final int PAGE_SIZE = 16 * 1024;
    final int NUM_RECORDS = 120000;
    try (IOManager ioManager = new IOManagerAsync()) {
        MemoryManager memMan = MemoryManagerBuilder.newBuilder().setMemorySize(4 * PAGE_SIZE).setPageSize(PAGE_SIZE).build();
        List<MemorySegment> memory = new ArrayList<MemorySegment>();
        memMan.allocatePages(new DummyInvokable(), memory, 4);
        FileIOChannel.ID channel = ioManager.createChannel();
        BlockChannelWriter<MemorySegment> writer = ioManager.createBlockChannelWriter(channel);
        FileChannelOutputView out = new FileChannelOutputView(writer, memMan, memory, memMan.getPageSize());
        // bytes)
        for (int i = 0; i < NUM_RECORDS; i += 4) {
            out.writeInt(i);
        }
        // close for the first time, make sure all memory returns
        out.close();
        assertTrue(memMan.verifyEmpty());
        memMan.allocatePages(new DummyInvokable(), memory, 4);
        SeekableFileChannelInputView in = new SeekableFileChannelInputView(ioManager, channel, memMan, memory, out.getBytesInLatestSegment());
        // read first, complete
        for (int i = 0; i < NUM_RECORDS; i += 4) {
            assertEquals(i, in.readInt());
        }
        try {
            in.readInt();
            fail("should throw EOF exception");
        } catch (EOFException ignored) {
        }
        // seek to the middle of the 3rd page
        int i = 2 * PAGE_SIZE + PAGE_SIZE / 4;
        in.seek(i);
        for (; i < NUM_RECORDS; i += 4) {
            assertEquals(i, in.readInt());
        }
        try {
            in.readInt();
            fail("should throw EOF exception");
        } catch (EOFException ignored) {
        }
        // seek to the end
        i = 120000 - 4;
        in.seek(i);
        for (; i < NUM_RECORDS; i += 4) {
            assertEquals(i, in.readInt());
        }
        try {
            in.readInt();
            fail("should throw EOF exception");
        } catch (EOFException ignored) {
        }
        // seek to the beginning
        i = 0;
        in.seek(i);
        for (; i < NUM_RECORDS; i += 4) {
            assertEquals(i, in.readInt());
        }
        try {
            in.readInt();
            fail("should throw EOF exception");
        } catch (EOFException ignored) {
        }
        // seek to after a page
        i = PAGE_SIZE;
        in.seek(i);
        for (; i < NUM_RECORDS; i += 4) {
            assertEquals(i, in.readInt());
        }
        try {
            in.readInt();
            fail("should throw EOF exception");
        } catch (EOFException ignored) {
        }
        // seek to after a page
        i = 3 * PAGE_SIZE;
        in.seek(i);
        for (; i < NUM_RECORDS; i += 4) {
            assertEquals(i, in.readInt());
        }
        try {
            in.readInt();
            fail("should throw EOF exception");
        } catch (EOFException ignored) {
        }
        // seek to the end
        i = NUM_RECORDS;
        in.seek(i);
        try {
            in.readInt();
            fail("should throw EOF exception");
        } catch (EOFException ignored) {
        }
        // seek out of bounds
        try {
            in.seek(-10);
            fail("should throw an exception");
        } catch (IllegalArgumentException ignored) {
        }
        try {
            in.seek(NUM_RECORDS + 1);
            fail("should throw an exception");
        } catch (IllegalArgumentException ignored) {
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) ArrayList(java.util.ArrayList) FileIOChannel(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) MemorySegment(org.apache.flink.core.memory.MemorySegment) EOFException(java.io.EOFException) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) EOFException(java.io.EOFException) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) Test(org.junit.Test)

Example 14 with MemoryManager

use of org.apache.flink.runtime.memory.MemoryManager in project flink by apache.

the class LargeRecordHandlerTest method testRecordHandlerCompositeKey.

@Test
public void testRecordHandlerCompositeKey() {
    final int PAGE_SIZE = 4 * 1024;
    final int NUM_PAGES = 24;
    final int NUM_RECORDS = 25000;
    try (final IOManager ioMan = new IOManagerAsync()) {
        final MemoryManager memMan = MemoryManagerBuilder.newBuilder().setMemorySize(NUM_PAGES * PAGE_SIZE).setPageSize(PAGE_SIZE).build();
        final AbstractInvokable owner = new DummyInvokable();
        final List<MemorySegment> initialMemory = memMan.allocatePages(owner, 6);
        final List<MemorySegment> sortMemory = memMan.allocatePages(owner, NUM_PAGES - 6);
        final TupleTypeInfo<Tuple3<Long, String, Byte>> typeInfo = (TupleTypeInfo<Tuple3<Long, String, Byte>>) TypeInformation.of(new TypeHint<Tuple3<Long, String, Byte>>() {
        });
        final TypeSerializer<Tuple3<Long, String, Byte>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        final TypeComparator<Tuple3<Long, String, Byte>> comparator = typeInfo.createComparator(new int[] { 2, 0 }, new boolean[] { true, true }, 0, new ExecutionConfig());
        LargeRecordHandler<Tuple3<Long, String, Byte>> handler = new LargeRecordHandler<Tuple3<Long, String, Byte>>(serializer, comparator, ioMan, memMan, initialMemory, owner, 128, owner.getExecutionConfig());
        assertFalse(handler.hasData());
        // add the test data
        Random rnd = new Random();
        for (int i = 0; i < NUM_RECORDS; i++) {
            long val = rnd.nextLong();
            handler.addRecord(new Tuple3<Long, String, Byte>(val, String.valueOf(val), (byte) val));
            assertTrue(handler.hasData());
        }
        MutableObjectIterator<Tuple3<Long, String, Byte>> sorted = handler.finishWriteAndSortKeys(sortMemory);
        try {
            handler.addRecord(new Tuple3<Long, String, Byte>(92L, "peter pepper", (byte) 1));
            fail("should throw an exception");
        } catch (IllegalStateException e) {
        // expected
        }
        Tuple3<Long, String, Byte> previous = null;
        Tuple3<Long, String, Byte> next;
        while ((next = sorted.next(null)) != null) {
            // key and value must be equal
            assertTrue(next.f0.equals(Long.parseLong(next.f1)));
            assertTrue(next.f0.byteValue() == next.f2);
            // order must be correct
            if (previous != null) {
                assertTrue(previous.f2 <= next.f2);
                assertTrue(previous.f2.byteValue() != next.f2.byteValue() || previous.f0 <= next.f0);
            }
            previous = next;
        }
        handler.close();
        assertFalse(handler.hasData());
        handler.close();
        try {
            handler.addRecord(new Tuple3<Long, String, Byte>(92L, "peter pepper", (byte) 1));
            fail("should throw an exception");
        } catch (IllegalStateException e) {
        // expected
        }
        assertTrue(memMan.verifyEmpty());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) Random(java.util.Random) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) MemorySegment(org.apache.flink.core.memory.MemorySegment) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 15 with MemoryManager

use of org.apache.flink.runtime.memory.MemoryManager in project flink by apache.

the class BinaryOperatorTestBase method shutdownAll.

// --------------------------------------------------------------------------------------------
@After
public void shutdownAll() throws Exception {
    // 1st, shutdown sorters
    for (Sorter<?> sorter : this.sorters) {
        if (sorter != null) {
            sorter.close();
        }
    }
    this.sorters.clear();
    // 2nd, shutdown I/O
    this.ioManager.close();
    // last, verify all memory is returned and shutdown mem manager
    MemoryManager memMan = getMemoryManager();
    if (memMan != null) {
        Assert.assertTrue("Memory Manager managed memory was not completely freed.", memMan.verifyEmpty());
        memMan.shutdown();
    }
}
Also used : MemoryManager(org.apache.flink.runtime.memory.MemoryManager) After(org.junit.After)

Aggregations

MemoryManager (org.apache.flink.runtime.memory.MemoryManager)69 Test (org.junit.Test)37 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)22 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)21 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)18 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)14 DummyInvokable (org.apache.flink.runtime.operators.testutils.DummyInvokable)14 UniformBinaryRowGenerator (org.apache.flink.table.runtime.util.UniformBinaryRowGenerator)14 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)13 MemorySegment (org.apache.flink.core.memory.MemorySegment)12 Configuration (org.apache.flink.configuration.Configuration)9 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)8 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)7 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)7 File (java.io.File)6 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)6 Map (java.util.Map)5 AbstractInvokable (org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)5 BufferedReader (java.io.BufferedReader)4