use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by caskdata.
the class ConnectionHandler method spec.
/**
* Retrieve the spec for the connector, which can be used in a source/sink
*/
@POST
@TransactionPolicy(value = TransactionControl.EXPLICIT)
@Path(API_VERSION + "/contexts/{context}/connections/{connection}/specification")
public void spec(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("context") String namespace, @PathParam("connection") String connection) {
respond(namespace, responder, namespaceSummary -> {
if (namespaceSummary.getName().equalsIgnoreCase(NamespaceId.SYSTEM.getNamespace())) {
responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Generating connector spec in system namespace is currently not supported");
return;
}
contextAccessEnforcer.enforce(new ConnectionEntityId(namespace, ConnectionId.getConnectionId(connection)), StandardPermission.USE);
String specGenerationRequestString = StandardCharsets.UTF_8.decode(request.getContent()).toString();
SpecGenerationRequest specRequest = GSON.fromJson(specGenerationRequestString, SpecGenerationRequest.class);
if (specRequest == null) {
responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "The request body is empty");
return;
}
if (specRequest.getPath() == null) {
responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Path is not provided in the sample request");
return;
}
Connection conn = store.getConnection(new ConnectionId(namespaceSummary, connection));
if (getContext().isRemoteTaskEnabled()) {
executeRemotely(namespace, specGenerationRequestString, conn, RemoteConnectionSpecTask.class, responder);
} else {
specGenerationLocally(namespaceSummary.getName(), specRequest, conn, responder);
}
Metrics child = metrics.child(ImmutableMap.of(Constants.Metrics.Tag.APP_ENTITY_TYPE, Constants.CONNECTION_SERVICE_NAME, Constants.Metrics.Tag.APP_ENTITY_TYPE_NAME, conn.getConnectionType()));
child.count(Constants.Metrics.Connection.CONNECTION_SPEC_COUNT, 1);
});
}
use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by caskdata.
the class ETLSpark method initialize.
@Override
@TransactionPolicy(TransactionControl.EXPLICIT)
public void initialize() throws Exception {
SparkClientContext context = getContext();
SparkConf sparkConf = new SparkConf();
sparkConf.set("spark.speculation", "false");
// turn off auto-broadcast by default until we better understand the implications and can set this to a
// value that we are confident is safe.
sparkConf.set("spark.sql.autoBroadcastJoinThreshold", "-1");
sparkConf.set("spark.maxRemoteBlockSizeFetchToMem", String.valueOf(Integer.MAX_VALUE - 512));
sparkConf.set("spark.network.timeout", "600s");
// Disable yarn app retries since spark already performs retries at a task level.
sparkConf.set("spark.yarn.maxAppAttempts", "1");
// to make sure fields that are the same but different casing are treated as different fields in auto-joins
// see CDAP-17024
sparkConf.set("spark.sql.caseSensitive", "true");
context.setSparkConf(sparkConf);
Map<String, String> properties = context.getSpecification().getProperties();
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) {
sparkConf.set(pipelineProperty.getKey(), pipelineProperty.getValue());
}
PipelineRuntime pipelineRuntime = new PipelineRuntime(context);
MacroEvaluator evaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace());
SparkPreparer preparer = new SparkPreparer(context, context.getMetrics(), evaluator, pipelineRuntime);
List<Finisher> finishers = preparer.prepare(phaseSpec);
finisher = new CompositeFinisher(finishers);
}
use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by caskdata.
the class ETLMapReduce method initialize.
@Override
@TransactionPolicy(TransactionControl.EXPLICIT)
public void initialize() throws Exception {
MapReduceContext context = getContext();
Map<String, String> properties = context.getSpecification().getProperties();
if (Boolean.valueOf(properties.get(Constants.STAGE_LOGGING_ENABLED))) {
LogStageInjector.start();
}
PipelineRuntime pipelineRuntime = new PipelineRuntime(context, mrMetrics);
Job job = context.getHadoopJob();
Configuration hConf = job.getConfiguration();
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) {
hConf.set(pipelineProperty.getKey(), pipelineProperty.getValue());
}
// should never happen if planner is correct
Set<StageSpec> reducers = phaseSpec.getPhase().getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE);
if (reducers.size() > 1) {
Iterator<StageSpec> reducerIter = reducers.iterator();
StringBuilder reducersStr = new StringBuilder(reducerIter.next().getName());
while (reducerIter.hasNext()) {
reducersStr.append(",");
reducersStr.append(reducerIter.next().getName());
}
throw new IllegalStateException("Found multiple reducers ( " + reducersStr + " ) in the same pipeline phase. " + "This means there was a bug in planning the pipeline when it was deployed. ");
}
job.setMapperClass(ETLMapper.class);
if (reducers.isEmpty()) {
job.setNumReduceTasks(0);
} else {
job.setReducerClass(ETLReducer.class);
}
// instantiate plugins and call their prepare methods
Set<String> connectorDatasets = GSON.fromJson(properties.get(Constants.CONNECTOR_DATASETS), CONNECTOR_DATASETS_TYPE);
MacroEvaluator evaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace());
MapReducePreparer preparer = new MapReducePreparer(context, mrMetrics, evaluator, pipelineRuntime, connectorDatasets);
List<Finisher> finishers = preparer.prepare(phaseSpec, job);
finisher = new CompositeFinisher(finishers);
}
use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by caskdata.
the class DataStreamsSparkLauncher method initialize.
@TransactionPolicy(TransactionControl.EXPLICIT)
@Override
public void initialize() throws Exception {
SparkClientContext context = getContext();
String arguments = Joiner.on(", ").withKeyValueSeparator("=").join(context.getRuntimeArguments());
WRAPPERLOGGER.info("Pipeline '{}' is started by user '{}' with arguments {}", context.getApplicationSpecification().getName(), UserGroupInformation.getCurrentUser().getShortUserName(), arguments);
DataStreamsPipelineSpec spec = GSON.fromJson(context.getSpecification().getProperty(Constants.PIPELINEID), DataStreamsPipelineSpec.class);
PipelinePluginContext pluginContext = new SparkPipelinePluginContext(context, context.getMetrics(), true, true);
int numSources = 0;
for (StageSpec stageSpec : spec.getStages()) {
if (StreamingSource.PLUGIN_TYPE.equals(stageSpec.getPlugin().getType())) {
StreamingSource<Object> streamingSource = pluginContext.newPluginInstance(stageSpec.getName());
numSources = numSources + streamingSource.getRequiredExecutors();
}
}
SparkConf sparkConf = new SparkConf();
sparkConf.set("spark.streaming.backpressure.enabled", "true");
sparkConf.set("spark.spark.streaming.blockInterval", String.valueOf(spec.getBatchIntervalMillis() / 5));
sparkConf.set("spark.maxRemoteBlockSizeFetchToMem", String.valueOf(Integer.MAX_VALUE - 512));
// spark... makes you set this to at least the number of receivers (streaming sources)
// because it holds one thread per receiver, or one core in distributed mode.
// so... we have to set this hacky master variable based on the isUnitTest setting in the config
String extraOpts = spec.getExtraJavaOpts();
if (extraOpts != null && !extraOpts.isEmpty()) {
sparkConf.set("spark.driver.extraJavaOptions", extraOpts);
sparkConf.set("spark.executor.extraJavaOptions", extraOpts);
}
// without this, stopping will hang on machines with few cores.
sparkConf.set("spark.rpc.netty.dispatcher.numThreads", String.valueOf(numSources + 2));
sparkConf.setMaster(String.format("local[%d]", numSources + 2));
sparkConf.set("spark.executor.instances", String.valueOf(numSources + 2));
if (spec.isUnitTest()) {
sparkConf.setMaster(String.format("local[%d]", numSources + 1));
}
// override defaults with any user provided engine configs
int minExecutors = numSources + 1;
for (Map.Entry<String, String> property : spec.getProperties().entrySet()) {
if ("spark.executor.instances".equals(property.getKey())) {
// don't let the user set this to something that doesn't make sense
try {
int numExecutors = Integer.parseInt(property.getValue());
if (numExecutors < minExecutors) {
LOG.warn("Number of executors {} is less than the minimum number required to run the pipeline. " + "Automatically increasing it to {}", numExecutors, minExecutors);
numExecutors = minExecutors;
}
sparkConf.set(property.getKey(), String.valueOf(numExecutors));
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Number of spark executors was set to invalid value " + property.getValue(), e);
}
} else {
sparkConf.set(property.getKey(), property.getValue());
}
}
context.setSparkConf(sparkConf);
WRAPPERLOGGER.info("Pipeline '{}' running", context.getApplicationSpecification().getName());
}
use of io.cdap.cdap.api.annotation.TransactionPolicy in project cdap by cdapio.
the class Transactions method getTransactionControl.
private static TransactionControl getTransactionControl(Class<?> cls, String methodName, Class<?>[] params) {
try {
Method method = cls.getDeclaredMethod(methodName, params);
TransactionPolicy annotation = method.getAnnotation(TransactionPolicy.class);
if (annotation != null) {
return annotation.value();
}
} catch (NoSuchMethodException e) {
// this class does not have the method, that is ok
}
return null;
}
Aggregations