Search in sources :

Example 1 with DistributedCacheEntry

use of org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry in project flink by apache.

the class FileCacheDeleteValidationTest method testFileReuseForNextTask.

@Test
public void testFileReuseForNextTask() {
    try {
        final JobID jobID = new JobID();
        final String fileName = "test_file";
        final String filePath = f.toURI().toString();
        // copy / create the file
        Future<Path> copyResult = fileCache.createTmpFile(fileName, new DistributedCacheEntry(filePath, false), jobID);
        copyResult.get();
        // get another reference to the file
        Future<Path> copyResult2 = fileCache.createTmpFile(fileName, new DistributedCacheEntry(filePath, false), jobID);
        // this should be available immediately
        assertTrue(copyResult2.isDone());
        // delete the file
        fileCache.deleteTmpFile(fileName, jobID);
        // file should not yet be deleted
        assertTrue(fileCache.holdsStillReference(fileName, jobID));
        // delete the second reference
        fileCache.deleteTmpFile(fileName, jobID);
        // file should still not be deleted, but remain for a bit
        assertTrue(fileCache.holdsStillReference(fileName, jobID));
        fileCache.createTmpFile(fileName, new DistributedCacheEntry(filePath, false), jobID);
        fileCache.deleteTmpFile(fileName, jobID);
        // after a while, the file should disappear
        long deadline = System.currentTimeMillis() + 20000;
        do {
            Thread.sleep(5500);
        } while (fileCache.holdsStillReference(fileName, jobID) && System.currentTimeMillis() < deadline);
        assertFalse(fileCache.holdsStillReference(fileName, jobID));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) DistributedCacheEntry(org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry) JobID(org.apache.flink.api.common.JobID) IOException(java.io.IOException) Test(org.junit.Test)

Example 2 with DistributedCacheEntry

use of org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry in project flink by apache.

the class JobGraphGenerator method compileJobGraph.

public JobGraph compileJobGraph(OptimizedPlan program, JobID jobId) {
    if (program == null) {
        throw new NullPointerException("Program is null, did you called " + "ExecutionEnvironment.execute()");
    }
    if (jobId == null) {
        jobId = JobID.generate();
    }
    this.vertices = new HashMap<PlanNode, JobVertex>();
    this.chainedTasks = new HashMap<PlanNode, TaskInChain>();
    this.chainedTasksInSequence = new ArrayList<TaskInChain>();
    this.auxVertices = new ArrayList<JobVertex>();
    this.iterations = new HashMap<IterationPlanNode, IterationDescriptor>();
    this.iterationStack = new ArrayList<IterationPlanNode>();
    this.sharingGroup = new SlotSharingGroup();
    // this starts the traversal that generates the job graph
    program.accept(this);
    // sanity check that we are not somehow in an iteration at the end
    if (this.currentIteration != null) {
        throw new CompilerException("The graph translation ended prematurely, leaving an unclosed iteration.");
    }
    // finalize the iterations
    for (IterationDescriptor iteration : this.iterations.values()) {
        if (iteration.getIterationNode() instanceof BulkIterationPlanNode) {
            finalizeBulkIteration(iteration);
        } else if (iteration.getIterationNode() instanceof WorksetIterationPlanNode) {
            finalizeWorksetIteration(iteration);
        } else {
            throw new CompilerException();
        }
    }
    // parents' configurations
    for (TaskInChain tic : this.chainedTasksInSequence) {
        TaskConfig t = new TaskConfig(tic.getContainingVertex().getConfiguration());
        t.addChainedTask(tic.getChainedTask(), tic.getTaskConfig(), tic.getTaskName());
    }
    // ----- attach the additional info to the job vertices, for display in the runtime monitor
    attachOperatorNamesAndDescriptions();
    // ----------- finalize the job graph -----------
    // create the job graph object
    JobGraph graph = new JobGraph(jobId, program.getJobName());
    try {
        graph.setExecutionConfig(program.getOriginalPlan().getExecutionConfig());
    } catch (IOException e) {
        throw new CompilerException("Could not serialize the ExecutionConfig." + "This indicates that non-serializable types (like custom serializers) were registered");
    }
    graph.setAllowQueuedScheduling(false);
    graph.setSessionTimeout(program.getOriginalPlan().getSessionTimeout());
    // add vertices to the graph
    for (JobVertex vertex : this.vertices.values()) {
        graph.addVertex(vertex);
    }
    for (JobVertex vertex : this.auxVertices) {
        graph.addVertex(vertex);
        vertex.setSlotSharingGroup(sharingGroup);
    }
    // add registered cache file into job configuration
    for (Entry<String, DistributedCacheEntry> e : program.getOriginalPlan().getCachedFiles()) {
        DistributedCache.writeFileInfoToConfig(e.getKey(), e.getValue(), graph.getJobConfiguration());
    }
    // release all references again
    this.vertices = null;
    this.chainedTasks = null;
    this.chainedTasksInSequence = null;
    this.auxVertices = null;
    this.iterations = null;
    this.iterationStack = null;
    // return job graph
    return graph;
}
Also used : WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) IOException(java.io.IOException) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DistributedCacheEntry(org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) CompilerException(org.apache.flink.optimizer.CompilerException) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 3 with DistributedCacheEntry

use of org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry in project flink by apache.

the class Plan method registerCachedFile.

/**
	 *  register cache files in program level
	 * @param entry contains all relevant information
	 * @param name user defined name of that file
	 * @throws java.io.IOException
	 */
public void registerCachedFile(String name, DistributedCacheEntry entry) throws IOException {
    if (!this.cacheFile.containsKey(name)) {
        try {
            URI u = new URI(entry.filePath);
            if (!u.getPath().startsWith("/")) {
                u = new File(entry.filePath).toURI();
            }
            FileSystem fs = FileSystem.get(u);
            if (fs.exists(new Path(u.getPath()))) {
                this.cacheFile.put(name, new DistributedCacheEntry(u.toString(), entry.isExecutable));
            } else {
                throw new IOException("File " + u.toString() + " doesn't exist.");
            }
        } catch (URISyntaxException ex) {
            throw new IOException("Invalid path: " + entry.filePath, ex);
        }
    } else {
        throw new IOException("cache file " + name + "already exists!");
    }
}
Also used : Path(org.apache.flink.core.fs.Path) DistributedCacheEntry(org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry) FileSystem(org.apache.flink.core.fs.FileSystem) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) File(java.io.File)

Aggregations

IOException (java.io.IOException)3 DistributedCacheEntry (org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry)3 Path (org.apache.flink.core.fs.Path)2 File (java.io.File)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 JobID (org.apache.flink.api.common.JobID)1 FileSystem (org.apache.flink.core.fs.FileSystem)1 CompilerException (org.apache.flink.optimizer.CompilerException)1 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)1 BulkPartialSolutionPlanNode (org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode)1 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)1 IterationPlanNode (org.apache.flink.optimizer.plan.IterationPlanNode)1 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)1 PlanNode (org.apache.flink.optimizer.plan.PlanNode)1 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)1 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)1 SolutionSetPlanNode (org.apache.flink.optimizer.plan.SolutionSetPlanNode)1 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)1 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)1