Search in sources :

Example 6 with TransactionPolicy

use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by caskdata.

the class ConnectionHandler method spec.

/**
 * Retrieve the spec for the connector, which can be used in a source/sink
 */
@POST
@TransactionPolicy(value = TransactionControl.EXPLICIT)
@Path(API_VERSION + "/contexts/{context}/connections/{connection}/specification")
public void spec(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("context") String namespace, @PathParam("connection") String connection) {
    respond(namespace, responder, namespaceSummary -> {
        if (namespaceSummary.getName().equalsIgnoreCase(NamespaceId.SYSTEM.getNamespace())) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Generating connector spec in system namespace is currently not supported");
            return;
        }
        contextAccessEnforcer.enforce(new ConnectionEntityId(namespace, ConnectionId.getConnectionId(connection)), StandardPermission.USE);
        String specGenerationRequestString = StandardCharsets.UTF_8.decode(request.getContent()).toString();
        SpecGenerationRequest specRequest = GSON.fromJson(specGenerationRequestString, SpecGenerationRequest.class);
        if (specRequest == null) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "The request body is empty");
            return;
        }
        if (specRequest.getPath() == null) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Path is not provided in the sample request");
            return;
        }
        Connection conn = store.getConnection(new ConnectionId(namespaceSummary, connection));
        if (getContext().isRemoteTaskEnabled()) {
            executeRemotely(namespace, specGenerationRequestString, conn, RemoteConnectionSpecTask.class, responder);
        } else {
            specGenerationLocally(namespaceSummary.getName(), specRequest, conn, responder);
        }
        Metrics child = metrics.child(ImmutableMap.of(Constants.Metrics.Tag.APP_ENTITY_TYPE, Constants.CONNECTION_SERVICE_NAME, Constants.Metrics.Tag.APP_ENTITY_TYPE_NAME, conn.getConnectionType()));
        child.count(Constants.Metrics.Connection.CONNECTION_SPEC_COUNT, 1);
    });
}
Also used : ConnectionId(io.cdap.cdap.etl.proto.connection.ConnectionId) Metrics(io.cdap.cdap.api.metrics.Metrics) HttpURLConnection(java.net.HttpURLConnection) Connection(io.cdap.cdap.etl.proto.connection.Connection) SpecGenerationRequest(io.cdap.cdap.etl.proto.connection.SpecGenerationRequest) ConnectionEntityId(io.cdap.cdap.proto.id.ConnectionEntityId) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Example 7 with TransactionPolicy

use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by caskdata.

the class ETLSpark method initialize.

@Override
@TransactionPolicy(TransactionControl.EXPLICIT)
public void initialize() throws Exception {
    SparkClientContext context = getContext();
    SparkConf sparkConf = new SparkConf();
    sparkConf.set("spark.speculation", "false");
    // turn off auto-broadcast by default until we better understand the implications and can set this to a
    // value that we are confident is safe.
    sparkConf.set("spark.sql.autoBroadcastJoinThreshold", "-1");
    sparkConf.set("spark.maxRemoteBlockSizeFetchToMem", String.valueOf(Integer.MAX_VALUE - 512));
    sparkConf.set("spark.network.timeout", "600s");
    // Disable yarn app retries since spark already performs retries at a task level.
    sparkConf.set("spark.yarn.maxAppAttempts", "1");
    // to make sure fields that are the same but different casing are treated as different fields in auto-joins
    // see CDAP-17024
    sparkConf.set("spark.sql.caseSensitive", "true");
    context.setSparkConf(sparkConf);
    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) {
        sparkConf.set(pipelineProperty.getKey(), pipelineProperty.getValue());
    }
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context);
    MacroEvaluator evaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace());
    SparkPreparer preparer = new SparkPreparer(context, context.getMetrics(), evaluator, pipelineRuntime);
    List<Finisher> finishers = preparer.prepare(phaseSpec);
    finisher = new CompositeFinisher(finishers);
}
Also used : PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) SparkClientContext(io.cdap.cdap.api.spark.SparkClientContext) CompositeFinisher(io.cdap.cdap.etl.common.submit.CompositeFinisher) CompositeFinisher(io.cdap.cdap.etl.common.submit.CompositeFinisher) Finisher(io.cdap.cdap.etl.common.submit.Finisher) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) SparkConf(org.apache.spark.SparkConf) HashMap(java.util.HashMap) Map(java.util.Map) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Example 8 with TransactionPolicy

use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by caskdata.

the class ETLMapReduce method initialize.

@Override
@TransactionPolicy(TransactionControl.EXPLICIT)
public void initialize() throws Exception {
    MapReduceContext context = getContext();
    Map<String, String> properties = context.getSpecification().getProperties();
    if (Boolean.valueOf(properties.get(Constants.STAGE_LOGGING_ENABLED))) {
        LogStageInjector.start();
    }
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context, mrMetrics);
    Job job = context.getHadoopJob();
    Configuration hConf = job.getConfiguration();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) {
        hConf.set(pipelineProperty.getKey(), pipelineProperty.getValue());
    }
    // should never happen if planner is correct
    Set<StageSpec> reducers = phaseSpec.getPhase().getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE);
    if (reducers.size() > 1) {
        Iterator<StageSpec> reducerIter = reducers.iterator();
        StringBuilder reducersStr = new StringBuilder(reducerIter.next().getName());
        while (reducerIter.hasNext()) {
            reducersStr.append(",");
            reducersStr.append(reducerIter.next().getName());
        }
        throw new IllegalStateException("Found multiple reducers ( " + reducersStr + " ) in the same pipeline phase. " + "This means there was a bug in planning the pipeline when it was deployed. ");
    }
    job.setMapperClass(ETLMapper.class);
    if (reducers.isEmpty()) {
        job.setNumReduceTasks(0);
    } else {
        job.setReducerClass(ETLReducer.class);
    }
    // instantiate plugins and call their prepare methods
    Set<String> connectorDatasets = GSON.fromJson(properties.get(Constants.CONNECTOR_DATASETS), CONNECTOR_DATASETS_TYPE);
    MacroEvaluator evaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace());
    MapReducePreparer preparer = new MapReducePreparer(context, mrMetrics, evaluator, pipelineRuntime, connectorDatasets);
    List<Finisher> finishers = preparer.prepare(phaseSpec, job);
    finisher = new CompositeFinisher(finishers);
}
Also used : PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) Configuration(org.apache.hadoop.conf.Configuration) CompositeFinisher(io.cdap.cdap.etl.common.submit.CompositeFinisher) MapReduceContext(io.cdap.cdap.api.mapreduce.MapReduceContext) CompositeFinisher(io.cdap.cdap.etl.common.submit.CompositeFinisher) Finisher(io.cdap.cdap.etl.common.submit.Finisher) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) Job(org.apache.hadoop.mapreduce.Job) HashMap(java.util.HashMap) Map(java.util.Map) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Example 9 with TransactionPolicy

use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by caskdata.

the class DataStreamsSparkLauncher method initialize.

@TransactionPolicy(TransactionControl.EXPLICIT)
@Override
public void initialize() throws Exception {
    SparkClientContext context = getContext();
    String arguments = Joiner.on(", ").withKeyValueSeparator("=").join(context.getRuntimeArguments());
    WRAPPERLOGGER.info("Pipeline '{}' is started by user '{}' with arguments {}", context.getApplicationSpecification().getName(), UserGroupInformation.getCurrentUser().getShortUserName(), arguments);
    DataStreamsPipelineSpec spec = GSON.fromJson(context.getSpecification().getProperty(Constants.PIPELINEID), DataStreamsPipelineSpec.class);
    PipelinePluginContext pluginContext = new SparkPipelinePluginContext(context, context.getMetrics(), true, true);
    int numSources = 0;
    for (StageSpec stageSpec : spec.getStages()) {
        if (StreamingSource.PLUGIN_TYPE.equals(stageSpec.getPlugin().getType())) {
            StreamingSource<Object> streamingSource = pluginContext.newPluginInstance(stageSpec.getName());
            numSources = numSources + streamingSource.getRequiredExecutors();
        }
    }
    SparkConf sparkConf = new SparkConf();
    sparkConf.set("spark.streaming.backpressure.enabled", "true");
    sparkConf.set("spark.spark.streaming.blockInterval", String.valueOf(spec.getBatchIntervalMillis() / 5));
    sparkConf.set("spark.maxRemoteBlockSizeFetchToMem", String.valueOf(Integer.MAX_VALUE - 512));
    // spark... makes you set this to at least the number of receivers (streaming sources)
    // because it holds one thread per receiver, or one core in distributed mode.
    // so... we have to set this hacky master variable based on the isUnitTest setting in the config
    String extraOpts = spec.getExtraJavaOpts();
    if (extraOpts != null && !extraOpts.isEmpty()) {
        sparkConf.set("spark.driver.extraJavaOptions", extraOpts);
        sparkConf.set("spark.executor.extraJavaOptions", extraOpts);
    }
    // without this, stopping will hang on machines with few cores.
    sparkConf.set("spark.rpc.netty.dispatcher.numThreads", String.valueOf(numSources + 2));
    sparkConf.setMaster(String.format("local[%d]", numSources + 2));
    sparkConf.set("spark.executor.instances", String.valueOf(numSources + 2));
    if (spec.isUnitTest()) {
        sparkConf.setMaster(String.format("local[%d]", numSources + 1));
    }
    // override defaults with any user provided engine configs
    int minExecutors = numSources + 1;
    for (Map.Entry<String, String> property : spec.getProperties().entrySet()) {
        if ("spark.executor.instances".equals(property.getKey())) {
            // don't let the user set this to something that doesn't make sense
            try {
                int numExecutors = Integer.parseInt(property.getValue());
                if (numExecutors < minExecutors) {
                    LOG.warn("Number of executors {} is less than the minimum number required to run the pipeline. " + "Automatically increasing it to {}", numExecutors, minExecutors);
                    numExecutors = minExecutors;
                }
                sparkConf.set(property.getKey(), String.valueOf(numExecutors));
            } catch (NumberFormatException e) {
                throw new IllegalArgumentException("Number of spark executors was set to invalid value " + property.getValue(), e);
            }
        } else {
            sparkConf.set(property.getKey(), property.getValue());
        }
    }
    context.setSparkConf(sparkConf);
    WRAPPERLOGGER.info("Pipeline '{}' running", context.getApplicationSpecification().getName());
}
Also used : SparkClientContext(io.cdap.cdap.api.spark.SparkClientContext) SparkPipelinePluginContext(io.cdap.cdap.etl.spark.plugin.SparkPipelinePluginContext) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) SparkConf(org.apache.spark.SparkConf) HashMap(java.util.HashMap) Map(java.util.Map) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext) SparkPipelinePluginContext(io.cdap.cdap.etl.spark.plugin.SparkPipelinePluginContext) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Example 10 with TransactionPolicy

use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by cdapio.

the class Transactions method getTransactionControl.

private static TransactionControl getTransactionControl(Class<?> cls, String methodName, Class<?>[] params) {
    try {
        Method method = cls.getDeclaredMethod(methodName, params);
        TransactionPolicy annotation = method.getAnnotation(TransactionPolicy.class);
        if (annotation != null) {
            return annotation.value();
        }
    } catch (NoSuchMethodException e) {
    // this class does not have the method, that is ok
    }
    return null;
}
Also used : TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy) Method(java.lang.reflect.Method)

Aggregations

TransactionPolicy (io.cdap.cdap.api.annotation.TransactionPolicy)26 Path (javax.ws.rs.Path)16 Metrics (io.cdap.cdap.api.metrics.Metrics)12 Connection (io.cdap.cdap.etl.proto.connection.Connection)12 ConnectionId (io.cdap.cdap.etl.proto.connection.ConnectionId)12 HttpURLConnection (java.net.HttpURLConnection)12 ConnectionEntityId (io.cdap.cdap.proto.id.ConnectionEntityId)10 POST (javax.ws.rs.POST)8 HashMap (java.util.HashMap)6 Map (java.util.Map)6 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)4 SparkClientContext (io.cdap.cdap.api.spark.SparkClientContext)4 BatchPhaseSpec (io.cdap.cdap.etl.batch.BatchPhaseSpec)4 DefaultMacroEvaluator (io.cdap.cdap.etl.common.DefaultMacroEvaluator)4 PipelineRuntime (io.cdap.cdap.etl.common.PipelineRuntime)4 CompositeFinisher (io.cdap.cdap.etl.common.submit.CompositeFinisher)4 Finisher (io.cdap.cdap.etl.common.submit.Finisher)4 ConnectionCreationRequest (io.cdap.cdap.etl.proto.connection.ConnectionCreationRequest)4 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)4 GET (javax.ws.rs.GET)4