use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class ApplicationRegistrationStage method registerDatasets.
// Register dataset usage, based upon the program specifications.
// Note that worker specifications' datasets are not registered upon app deploy because the useDataset of the
// WorkerConfigurer is deprecated. Workers' access to datasets is aimed to be completely dynamic. Other programs are
// moving in this direction.
// Also, SparkSpecifications are the same in that a Spark program's dataset access is completely dynamic.
private void registerDatasets(ApplicationWithPrograms input) {
ApplicationSpecification appSpec = input.getSpecification();
ApplicationId appId = input.getApplicationId();
NamespaceId namespaceId = appId.getParent();
for (MapReduceSpecification program : appSpec.getMapReduce().values()) {
ProgramId programId = appId.mr(program.getName());
for (String dataset : program.getDataSets()) {
usageRegistry.register(programId, namespaceId.dataset(dataset));
}
}
for (SparkSpecification sparkSpec : appSpec.getSpark().values()) {
ProgramId programId = appId.spark(sparkSpec.getName());
for (String dataset : sparkSpec.getDatasets()) {
usageRegistry.register(programId, namespaceId.dataset(dataset));
}
}
for (ServiceSpecification serviceSpecification : appSpec.getServices().values()) {
ProgramId programId = appId.service(serviceSpecification.getName());
for (HttpServiceHandlerSpecification handlerSpecification : serviceSpecification.getHandlers().values()) {
for (String dataset : handlerSpecification.getDatasets()) {
usageRegistry.register(programId, namespaceId.dataset(dataset));
}
}
}
}
use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class ApplicationSpecificationCodec method deserialize.
@Override
public ApplicationSpecification deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
JsonObject jsonObj = json.getAsJsonObject();
String name = jsonObj.get("name").getAsString();
String appVersion = ApplicationId.DEFAULT_VERSION;
if (jsonObj.has("appVersion")) {
appVersion = jsonObj.get("appVersion").getAsString();
}
String appCDAPVersion = jsonObj.has("appCDAPVersion") ? jsonObj.get("appCDAPVersion").getAsString() : null;
String description = jsonObj.get("description").getAsString();
String configuration = null;
if (jsonObj.has("configuration")) {
configuration = jsonObj.get("configuration").getAsString();
}
ArtifactId artifactId = context.deserialize(jsonObj.get("artifactId"), ArtifactId.class);
Map<String, String> datasetModules = deserializeMap(jsonObj.get("datasetModules"), context, String.class);
Map<String, DatasetCreationSpec> datasetInstances = deserializeMap(jsonObj.get("datasetInstances"), context, DatasetCreationSpec.class);
Map<String, MapReduceSpecification> mapReduces = deserializeMap(jsonObj.get("mapReduces"), context, MapReduceSpecification.class);
Map<String, SparkSpecification> sparks = deserializeMap(jsonObj.get("sparks"), context, SparkSpecification.class);
Map<String, WorkflowSpecification> workflows = deserializeMap(jsonObj.get("workflows"), context, WorkflowSpecification.class);
Map<String, ServiceSpecification> services = deserializeMap(jsonObj.get("services"), context, ServiceSpecification.class);
Map<String, ScheduleCreationSpec> programSchedules = deserializeMap(jsonObj.get("programSchedules"), context, ScheduleCreationSpec.class);
Map<String, WorkerSpecification> workers = deserializeMap(jsonObj.get("workers"), context, WorkerSpecification.class);
Map<String, Plugin> plugins = deserializeMap(jsonObj.get("plugins"), context, Plugin.class);
return new DefaultApplicationSpecification(name, appVersion, appCDAPVersion, description, configuration, artifactId, datasetModules, datasetInstances, mapReduces, sparks, workflows, services, programSchedules, workers, plugins);
}
use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class SparkRuntimeContext method getSparkSpecification.
private static SparkSpecification getSparkSpecification(Program program) {
SparkSpecification spec = program.getApplicationSpecification().getSpark().get(program.getName());
// Spec shouldn't be null, otherwise the spark program won't even get started
Preconditions.checkState(spec != null, "SparkSpecification not found for %s", program.getId());
return spec;
}
use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class DistributedSparkProgramRunner method validateOptions.
@Override
protected void validateOptions(Program program, ProgramOptions options) {
super.validateOptions(program, options);
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification for %s", program.getId());
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type for %s", program.getId());
Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only SPARK process type is supported. Program type is %s for %s", processorType, program.getId());
SparkSpecification spec = appSpec.getSpark().get(program.getName());
Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getId());
}
use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class DistributedSparkProgramRunner method setupLaunchConfig.
@Override
protected void setupLaunchConfig(ProgramLaunchConfig launchConfig, Program program, ProgramOptions options, CConfiguration cConf, Configuration hConf, File tempDir) throws IOException {
// Update the container hConf
if (clusterMode == ClusterMode.ON_PREMISE) {
// Kerberos is only supported in on premise mode
hConf.set(Constants.Explore.HIVE_METASTORE_TOKEN_SIG, Constants.Explore.HIVE_METASTORE_TOKEN_SERVICE_NAME);
if (SecurityUtil.isKerberosEnabled(cConf)) {
// Need to divide the interval by 0.8 because Spark logic has a 0.8 discount on the interval
// If we don't offset it, it will look for the new credentials too soon
// Also add 5 seconds to the interval to give master time to push the changes to the Spark client container
long interval = (long) ((TokenSecureStoreRenewer.calculateUpdateInterval(cConf, hConf) + 5000) / 0.8);
launchConfig.addExtraSystemArgument(SparkRuntimeContextConfig.CREDENTIALS_UPDATE_INTERVAL_MS, Long.toString(interval));
}
}
// Setup the launch config
ApplicationSpecification appSpec = program.getApplicationSpecification();
SparkSpecification spec = appSpec.getSpark().get(program.getName());
Map<String, String> clientArgs = RuntimeArguments.extractScope("task", "client", options.getUserArguments().asMap());
// Add runnable. Only one instance for the spark client
launchConfig.addRunnable(spec.getName(), new SparkTwillRunnable(spec.getName()), 1, clientArgs, spec.getClientResources(), 0);
Map<String, String> extraEnv = new HashMap<>();
extraEnv.put(Constants.SPARK_COMPAT_ENV, sparkCompat.getCompat());
if (sparkCompat.getCompat().equals(SparkCompat.SPARK2_2_11.getCompat())) {
// No need to rewrite YARN client
cConf.setBoolean(Constants.AppFabric.SPARK_YARN_CLIENT_REWRITE, false);
}
// Add extra resources, classpath, dependencies, env and setup ClassAcceptor
if (clusterMode == ClusterMode.ON_PREMISE || cConf.getBoolean(Constants.AppFabric.PROGRAM_REMOTE_RUNNER, false)) {
Map<String, LocalizeResource> localizeResources = new HashMap<>();
SparkPackageUtils.prepareSparkResources(sparkCompat, locationFactory, tempDir, localizeResources, extraEnv);
// Add the mapreduce resources and path as well for the InputFormat/OutputFormat classes
MapReduceContainerHelper.localizeFramework(hConf, localizeResources);
launchConfig.addExtraResources(localizeResources).addExtraClasspath(MapReduceContainerHelper.addMapReduceClassPath(hConf, new ArrayList<String>()));
}
launchConfig.addExtraEnv(extraEnv).addExtraDependencies(SparkProgramRuntimeProvider.class).addExtraSystemArgument(SparkRuntimeContextConfig.DISTRIBUTED_MODE, Boolean.TRUE.toString()).setClassAcceptor(createBundlerClassAcceptor());
}
Aggregations