Search in sources :

Example 1 with FlowGraph

use of org.apache.gobblin.service.modules.flowgraph.FlowGraph in project incubator-gobblin by apache.

the class MultiHopFlowCompiler method compileFlow.

/**
 * j
 * @param spec an instance of {@link FlowSpec}.
 * @return A DAG of {@link JobExecutionPlan}s, which encapsulates the compiled {@link org.apache.gobblin.runtime.api.JobSpec}s
 * together with the {@link SpecExecutor} where the job can be executed.
 */
@Override
public Dag<JobExecutionPlan> compileFlow(Spec spec) {
    Preconditions.checkNotNull(spec);
    Preconditions.checkArgument(spec instanceof FlowSpec, "MultiHopFlowCompiler only accepts FlowSpecs");
    long startTime = System.nanoTime();
    FlowSpec flowSpec = (FlowSpec) spec;
    String source = ConfigUtils.getString(flowSpec.getConfig(), ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY, "");
    String destination = ConfigUtils.getString(flowSpec.getConfig(), ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY, "");
    DataNode sourceNode = this.flowGraph.getNode(source);
    if (sourceNode == null) {
        flowSpec.addCompilationError(source, destination, String.format("Flowgraph does not have a node with id %s", source));
        return null;
    }
    List<String> destNodeIds = ConfigUtils.getStringList(flowSpec.getConfig(), ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY);
    List<DataNode> destNodes = destNodeIds.stream().map(this.flowGraph::getNode).collect(Collectors.toList());
    if (destNodes.contains(null)) {
        flowSpec.addCompilationError(source, destination, String.format("Flowgraph does not have a node with id %s", destNodeIds.get(destNodes.indexOf(null))));
        return null;
    }
    log.info(String.format("Compiling flow for source: %s and destination: %s", source, destination));
    List<FlowSpec> flowSpecs = splitFlowSpec(flowSpec);
    Dag<JobExecutionPlan> jobExecutionPlanDag = new Dag<>(new ArrayList<>());
    try {
        this.rwLock.readLock().lock();
        for (FlowSpec datasetFlowSpec : flowSpecs) {
            for (DataNode destNode : destNodes) {
                long authStartTime = System.nanoTime();
                try {
                    boolean authorized = this.dataMovementAuthorizer.isMovementAuthorized(flowSpec, sourceNode, destNode);
                    Instrumented.updateTimer(dataAuthorizationTimer, System.nanoTime() - authStartTime, TimeUnit.NANOSECONDS);
                    if (!authorized) {
                        String message = String.format("Data movement is not authorized for flow: %s, source: %s, destination: %s", flowSpec.getUri().toString(), source, destination);
                        log.error(message);
                        datasetFlowSpec.addCompilationError(source, destination, message);
                        return null;
                    }
                } catch (Exception e) {
                    Instrumented.markMeter(flowCompilationFailedMeter);
                    datasetFlowSpec.addCompilationError(source, destination, Throwables.getStackTraceAsString(e));
                    return null;
                }
            }
            // Compute the path from source to destination.
            FlowGraphPath flowGraphPath = flowGraph.findPath(datasetFlowSpec);
            if (flowGraphPath != null) {
                // Convert the path into a Dag of JobExecutionPlans.
                jobExecutionPlanDag = jobExecutionPlanDag.merge(flowGraphPath.asDag(this.config));
            }
        }
        if (jobExecutionPlanDag.isEmpty()) {
            Instrumented.markMeter(flowCompilationFailedMeter);
            String message = String.format("No path found from source: %s and destination: %s", source, destination);
            log.info(message);
            if (!flowSpec.getCompilationErrors().stream().anyMatch(compilationError -> compilationError.errorPriority == 0)) {
                flowSpec.addCompilationError(source, destination, message);
            }
            return null;
        }
    } catch (PathFinder.PathFinderException | SpecNotFoundException | JobTemplate.TemplateException | URISyntaxException | ReflectiveOperationException e) {
        Instrumented.markMeter(flowCompilationFailedMeter);
        String message = String.format("Exception encountered while compiling flow for source: %s and destination: %s, %s", source, destination, Throwables.getStackTraceAsString(e));
        log.error(message, e);
        flowSpec.addCompilationError(source, destination, message);
        return null;
    } finally {
        this.rwLock.readLock().unlock();
    }
    Instrumented.markMeter(flowCompilationSuccessFulMeter);
    Instrumented.updateTimer(flowCompilationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
    return jobExecutionPlanDag;
}
Also used : DatasetDescriptorConfigKeys(org.apache.gobblin.service.modules.flowgraph.DatasetDescriptorConfigKeys) Getter(lombok.Getter) ObservingFSFlowEdgeTemplateCatalog(org.apache.gobblin.service.modules.template_catalog.ObservingFSFlowEdgeTemplateCatalog) ServiceManager(com.google.common.util.concurrent.ServiceManager) URISyntaxException(java.net.URISyntaxException) TimeoutException(java.util.concurrent.TimeoutException) ConfigValueFactory(com.typesafe.config.ConfigValueFactory) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) ConfigUtils(org.apache.gobblin.util.ConfigUtils) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) PathFinder(org.apache.gobblin.service.modules.flowgraph.pathfinder.PathFinder) Lists(com.google.common.collect.Lists) Optional(com.google.common.base.Optional) Path(org.apache.hadoop.fs.Path) JobTemplate(org.apache.gobblin.runtime.api.JobTemplate) BaseFlowGraph(org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph) ServiceConfigKeys(org.apache.gobblin.service.ServiceConfigKeys) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) Spec(org.apache.gobblin.runtime.api.Spec) Logger(org.slf4j.Logger) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) Config(com.typesafe.config.Config) ClassAliasResolver(org.apache.gobblin.util.ClassAliasResolver) Instrumented(org.apache.gobblin.instrumented.Instrumented) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) ConfigurationKeys(org.apache.gobblin.configuration.ConfigurationKeys) Collectors(java.util.stream.Collectors) InvocationTargetException(java.lang.reflect.InvocationTargetException) TimeUnit(java.util.concurrent.TimeUnit) Alpha(org.apache.gobblin.annotation.Alpha) SpecNotFoundException(org.apache.gobblin.runtime.api.SpecNotFoundException) DataNode(org.apache.gobblin.service.modules.flowgraph.DataNode) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) FlowGraph(org.apache.gobblin.service.modules.flowgraph.FlowGraph) Preconditions(com.google.common.base.Preconditions) ConstructorUtils(org.apache.commons.lang3.reflect.ConstructorUtils) VisibleForTesting(com.google.common.annotations.VisibleForTesting) JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) Joiner(com.google.common.base.Joiner) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) GitFlowGraphMonitor(org.apache.gobblin.service.modules.core.GitFlowGraphMonitor) JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) URISyntaxException(java.net.URISyntaxException) URISyntaxException(java.net.URISyntaxException) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) InvocationTargetException(java.lang.reflect.InvocationTargetException) SpecNotFoundException(org.apache.gobblin.runtime.api.SpecNotFoundException) SpecNotFoundException(org.apache.gobblin.runtime.api.SpecNotFoundException) DataNode(org.apache.gobblin.service.modules.flowgraph.DataNode) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec)

Aggregations

VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Joiner (com.google.common.base.Joiner)1 Optional (com.google.common.base.Optional)1 Preconditions (com.google.common.base.Preconditions)1 Throwables (com.google.common.base.Throwables)1 Lists (com.google.common.collect.Lists)1 ServiceManager (com.google.common.util.concurrent.ServiceManager)1 Config (com.typesafe.config.Config)1 ConfigValueFactory (com.typesafe.config.ConfigValueFactory)1 IOException (java.io.IOException)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 URISyntaxException (java.net.URISyntaxException)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 TimeUnit (java.util.concurrent.TimeUnit)1 TimeoutException (java.util.concurrent.TimeoutException)1 ReadWriteLock (java.util.concurrent.locks.ReadWriteLock)1 ReentrantReadWriteLock (java.util.concurrent.locks.ReentrantReadWriteLock)1 Collectors (java.util.stream.Collectors)1