Search in sources :

Example 11 with TransactionPolicy

use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by cdapio.

the class ETLSpark method initialize.

@Override
@TransactionPolicy(TransactionControl.EXPLICIT)
public void initialize() throws Exception {
    SparkClientContext context = getContext();
    SparkConf sparkConf = new SparkConf();
    sparkConf.set("spark.speculation", "false");
    // turn off auto-broadcast by default until we better understand the implications and can set this to a
    // value that we are confident is safe.
    sparkConf.set("spark.sql.autoBroadcastJoinThreshold", "-1");
    sparkConf.set("spark.maxRemoteBlockSizeFetchToMem", String.valueOf(Integer.MAX_VALUE - 512));
    sparkConf.set("spark.network.timeout", "600s");
    // Disable yarn app retries since spark already performs retries at a task level.
    sparkConf.set("spark.yarn.maxAppAttempts", "1");
    // to make sure fields that are the same but different casing are treated as different fields in auto-joins
    // see CDAP-17024
    sparkConf.set("spark.sql.caseSensitive", "true");
    context.setSparkConf(sparkConf);
    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) {
        sparkConf.set(pipelineProperty.getKey(), pipelineProperty.getValue());
    }
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context);
    MacroEvaluator evaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace());
    SparkPreparer preparer = new SparkPreparer(context, context.getMetrics(), evaluator, pipelineRuntime);
    List<Finisher> finishers = preparer.prepare(phaseSpec);
    finisher = new CompositeFinisher(finishers);
}
Also used : PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) SparkClientContext(io.cdap.cdap.api.spark.SparkClientContext) CompositeFinisher(io.cdap.cdap.etl.common.submit.CompositeFinisher) CompositeFinisher(io.cdap.cdap.etl.common.submit.CompositeFinisher) Finisher(io.cdap.cdap.etl.common.submit.Finisher) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) SparkConf(org.apache.spark.SparkConf) HashMap(java.util.HashMap) Map(java.util.Map) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Example 12 with TransactionPolicy

use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by cdapio.

the class ETLMapReduce method initialize.

@Override
@TransactionPolicy(TransactionControl.EXPLICIT)
public void initialize() throws Exception {
    MapReduceContext context = getContext();
    Map<String, String> properties = context.getSpecification().getProperties();
    if (Boolean.valueOf(properties.get(Constants.STAGE_LOGGING_ENABLED))) {
        LogStageInjector.start();
    }
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context, mrMetrics);
    Job job = context.getHadoopJob();
    Configuration hConf = job.getConfiguration();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) {
        hConf.set(pipelineProperty.getKey(), pipelineProperty.getValue());
    }
    // should never happen if planner is correct
    Set<StageSpec> reducers = phaseSpec.getPhase().getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE);
    if (reducers.size() > 1) {
        Iterator<StageSpec> reducerIter = reducers.iterator();
        StringBuilder reducersStr = new StringBuilder(reducerIter.next().getName());
        while (reducerIter.hasNext()) {
            reducersStr.append(",");
            reducersStr.append(reducerIter.next().getName());
        }
        throw new IllegalStateException("Found multiple reducers ( " + reducersStr + " ) in the same pipeline phase. " + "This means there was a bug in planning the pipeline when it was deployed. ");
    }
    job.setMapperClass(ETLMapper.class);
    if (reducers.isEmpty()) {
        job.setNumReduceTasks(0);
    } else {
        job.setReducerClass(ETLReducer.class);
    }
    // instantiate plugins and call their prepare methods
    Set<String> connectorDatasets = GSON.fromJson(properties.get(Constants.CONNECTOR_DATASETS), CONNECTOR_DATASETS_TYPE);
    MacroEvaluator evaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace());
    MapReducePreparer preparer = new MapReducePreparer(context, mrMetrics, evaluator, pipelineRuntime, connectorDatasets);
    List<Finisher> finishers = preparer.prepare(phaseSpec, job);
    finisher = new CompositeFinisher(finishers);
}
Also used : PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) Configuration(org.apache.hadoop.conf.Configuration) CompositeFinisher(io.cdap.cdap.etl.common.submit.CompositeFinisher) MapReduceContext(io.cdap.cdap.api.mapreduce.MapReduceContext) CompositeFinisher(io.cdap.cdap.etl.common.submit.CompositeFinisher) Finisher(io.cdap.cdap.etl.common.submit.Finisher) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) Job(org.apache.hadoop.mapreduce.Job) HashMap(java.util.HashMap) Map(java.util.Map) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Example 13 with TransactionPolicy

use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by cdapio.

the class ConnectionHandler method deleteConnection.

/**
 * Delete a connection in the given namespace
 */
@DELETE
@TransactionPolicy(value = TransactionControl.EXPLICIT)
@Path(API_VERSION + "/contexts/{context}/connections/{connection}")
public void deleteConnection(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("context") String namespace, @PathParam("connection") String connection) {
    respond(namespace, responder, namespaceSummary -> {
        if (namespaceSummary.getName().equalsIgnoreCase(NamespaceId.SYSTEM.getNamespace())) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Deleting connection in system namespace is currently not supported");
            return;
        }
        ConnectionId connectionId = new ConnectionId(namespaceSummary, connection);
        contextAccessEnforcer.enforce(new ConnectionEntityId(namespace, connectionId.getConnectionId()), StandardPermission.DELETE);
        Connection oldConnection = store.getConnection(connectionId);
        store.deleteConnection(connectionId);
        Metrics child = metrics.child(ImmutableMap.of(Constants.Metrics.Tag.APP_ENTITY_TYPE, Constants.CONNECTION_SERVICE_NAME, Constants.Metrics.Tag.APP_ENTITY_TYPE_NAME, oldConnection.getConnectionType()));
        child.count(Constants.Metrics.Connection.CONNECTION_DELETED_COUNT, 1);
        responder.sendStatus(HttpURLConnection.HTTP_OK);
    });
}
Also used : ConnectionId(io.cdap.cdap.etl.proto.connection.ConnectionId) Metrics(io.cdap.cdap.api.metrics.Metrics) HttpURLConnection(java.net.HttpURLConnection) Connection(io.cdap.cdap.etl.proto.connection.Connection) ConnectionEntityId(io.cdap.cdap.proto.id.ConnectionEntityId) Path(javax.ws.rs.Path) DELETE(javax.ws.rs.DELETE) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Example 14 with TransactionPolicy

use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by cdapio.

the class ConnectionHandler method sample.

/**
 * Retrive sample result for the connection
 */
@POST
@TransactionPolicy(value = TransactionControl.EXPLICIT)
@Path(API_VERSION + "/contexts/{context}/connections/{connection}/sample")
public void sample(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("context") String namespace, @PathParam("connection") String connection) {
    respond(namespace, responder, namespaceSummary -> {
        if (namespaceSummary.getName().equalsIgnoreCase(NamespaceId.SYSTEM.getNamespace())) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Sampling connection in system namespace is currently not supported");
            return;
        }
        contextAccessEnforcer.enforce(new ConnectionEntityId(namespace, ConnectionId.getConnectionId(connection)), StandardPermission.USE);
        String sampleRequestString = StandardCharsets.UTF_8.decode(request.getContent()).toString();
        SampleRequest sampleRequest = GSON.fromJson(sampleRequestString, SampleRequest.class);
        if (sampleRequest == null) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "The request body is empty");
            return;
        }
        if (sampleRequest.getPath() == null) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Path is not provided in the sample request");
            return;
        }
        if (sampleRequest.getLimit() <= 0) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Limit should be greater than 0");
            return;
        }
        Connection conn = store.getConnection(new ConnectionId(namespaceSummary, connection));
        if (getContext().isRemoteTaskEnabled()) {
            executeRemotely(namespace, sampleRequestString, conn, RemoteConnectionSampleTask.class, responder);
        } else {
            sampleLocally(namespaceSummary.getName(), sampleRequestString, conn, responder);
        }
        Metrics child = metrics.child(ImmutableMap.of(Constants.Metrics.Tag.APP_ENTITY_TYPE, Constants.CONNECTION_SERVICE_NAME, Constants.Metrics.Tag.APP_ENTITY_TYPE_NAME, conn.getConnectionType()));
        child.count(Constants.Metrics.Connection.CONNECTION_SAMPLE_COUNT, 1);
        // sample will also generate the spec, so add the metric for it
        child.count(Constants.Metrics.Connection.CONNECTION_SPEC_COUNT, 1);
    });
}
Also used : ConnectionId(io.cdap.cdap.etl.proto.connection.ConnectionId) Metrics(io.cdap.cdap.api.metrics.Metrics) HttpURLConnection(java.net.HttpURLConnection) Connection(io.cdap.cdap.etl.proto.connection.Connection) SampleRequest(io.cdap.cdap.etl.api.connector.SampleRequest) ConnectionEntityId(io.cdap.cdap.proto.id.ConnectionEntityId) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Example 15 with TransactionPolicy

use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by cdapio.

the class ConnectionHandler method getConnection.

/**
 * Returns the specific connection information in the given namespace
 */
@GET
@TransactionPolicy(value = TransactionControl.EXPLICIT)
@Path(API_VERSION + "/contexts/{context}/connections/{connection}")
public void getConnection(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("context") String namespace, @PathParam("connection") String connection) {
    respond(namespace, responder, namespaceSummary -> {
        if (namespaceSummary.getName().equalsIgnoreCase(NamespaceId.SYSTEM.getNamespace())) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Getting connection in system namespace is currently not supported");
            return;
        }
        contextAccessEnforcer.enforce(new ConnectionEntityId(namespace, ConnectionId.getConnectionId(connection)), StandardPermission.GET);
        Connection conn = store.getConnection(new ConnectionId(namespaceSummary, connection));
        Metrics child = metrics.child(ImmutableMap.of(Constants.Metrics.Tag.APP_ENTITY_TYPE, Constants.CONNECTION_SERVICE_NAME, Constants.Metrics.Tag.APP_ENTITY_TYPE_NAME, conn.getConnectionType()));
        child.count(Constants.Metrics.Connection.CONNECTION_GET_COUNT, 1);
        responder.sendJson(conn);
    });
}
Also used : ConnectionId(io.cdap.cdap.etl.proto.connection.ConnectionId) Metrics(io.cdap.cdap.api.metrics.Metrics) HttpURLConnection(java.net.HttpURLConnection) Connection(io.cdap.cdap.etl.proto.connection.Connection) ConnectionEntityId(io.cdap.cdap.proto.id.ConnectionEntityId) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Aggregations

TransactionPolicy (io.cdap.cdap.api.annotation.TransactionPolicy)26 Path (javax.ws.rs.Path)16 Metrics (io.cdap.cdap.api.metrics.Metrics)12 Connection (io.cdap.cdap.etl.proto.connection.Connection)12 ConnectionId (io.cdap.cdap.etl.proto.connection.ConnectionId)12 HttpURLConnection (java.net.HttpURLConnection)12 ConnectionEntityId (io.cdap.cdap.proto.id.ConnectionEntityId)10 POST (javax.ws.rs.POST)8 HashMap (java.util.HashMap)6 Map (java.util.Map)6 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)4 SparkClientContext (io.cdap.cdap.api.spark.SparkClientContext)4 BatchPhaseSpec (io.cdap.cdap.etl.batch.BatchPhaseSpec)4 DefaultMacroEvaluator (io.cdap.cdap.etl.common.DefaultMacroEvaluator)4 PipelineRuntime (io.cdap.cdap.etl.common.PipelineRuntime)4 CompositeFinisher (io.cdap.cdap.etl.common.submit.CompositeFinisher)4 Finisher (io.cdap.cdap.etl.common.submit.Finisher)4 ConnectionCreationRequest (io.cdap.cdap.etl.proto.connection.ConnectionCreationRequest)4 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)4 GET (javax.ws.rs.GET)4