use of co.cask.cdap.api.Admin in project cdap by caskdata.
the class AdminApp method performAdmin.
// this will get called from the worker, also from a custom workflow action
static void performAdmin(RuntimeContext context) {
Admin admin = context.getAdmin();
Map<String, String> args = context.getRuntimeArguments();
try {
// if invoked with dropAll=true, clean up all datasets (a, b, c, d)
if ("true".equals(args.get("dropAll"))) {
for (String name : new String[] { "a", "b", "c", "d" }) {
if (admin.datasetExists(name)) {
admin.dropDataset(name);
}
}
} else {
// create a, update b with /extra in base path, truncate c, drop d
admin.createDataset("a", Table.class.getName(), DatasetProperties.EMPTY);
String type = admin.getDatasetType("b");
Assert.assertEquals(FileSet.class.getName(), type);
DatasetProperties bProps = admin.getDatasetProperties("b");
String base = bProps.getProperties().get("base.path");
Assert.assertNotNull(base);
String newBase = args.get("new.base.path");
DatasetProperties newBProps = ((FileSetProperties.Builder) FileSetProperties.builder().addAll(bProps.getProperties())).setDataExternal(true).setBasePath(newBase).build();
admin.updateDataset("b", newBProps);
admin.truncateDataset("c");
admin.dropDataset("d");
}
} catch (DatasetManagementException e) {
Throwables.propagate(e);
}
}
use of co.cask.cdap.api.Admin in project cdap by caskdata.
the class ETLSpark method initialize.
@Override
@TransactionPolicy(TransactionControl.EXPLICIT)
public void initialize() throws Exception {
final SparkClientContext context = getContext();
cleanupFiles = new ArrayList<>();
List<Finisher> finishers = new ArrayList<>();
SparkConf sparkConf = new SparkConf();
sparkConf.set("spark.driver.extraJavaOptions", "-XX:MaxPermSize=256m");
sparkConf.set("spark.executor.extraJavaOptions", "-XX:MaxPermSize=256m");
sparkConf.set("spark.speculation", "false");
context.setSparkConf(sparkConf);
Map<String, String> properties = context.getSpecification().getProperties();
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) {
sparkConf.set(pipelineProperty.getKey(), pipelineProperty.getValue());
}
MacroEvaluator evaluator = new DefaultMacroEvaluator(new BasicArguments(context), context.getLogicalStartTime(), context, context.getNamespace());
final SparkBatchSourceFactory sourceFactory = new SparkBatchSourceFactory();
final SparkBatchSinkFactory sinkFactory = new SparkBatchSinkFactory();
final Map<String, Integer> stagePartitions = new HashMap<>();
PluginContext pluginContext = new SparkPipelinePluginContext(context, context.getMetrics(), phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(pluginContext, context.getMetrics(), phaseSpec, new SingleConnectorFactory());
final PipelineRuntime pipelineRuntime = new PipelineRuntime(context);
final Admin admin = context.getAdmin();
PipelinePhase phase = phaseSpec.getPhase();
// go through in topological order so that arguments set by one stage are seen by stages after it
for (final String stageName : phase.getDag().getTopologicalOrder()) {
final StageSpec stageSpec = phase.getStage(stageName);
String pluginType = stageSpec.getPluginType();
boolean isConnectorSource = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && phase.getSources().contains(stageName);
boolean isConnectorSink = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && phase.getSinks().contains(stageName);
SubmitterPlugin submitterPlugin = null;
if (BatchSource.PLUGIN_TYPE.equals(pluginType) || isConnectorSource) {
BatchConfigurable<BatchSourceContext> batchSource = pluginInstantiator.newPluginInstance(stageName, evaluator);
ContextProvider<BatchSourceContext> contextProvider = new ContextProvider<BatchSourceContext>() {
@Override
public BatchSourceContext getContext(DatasetContext datasetContext) {
return new SparkBatchSourceContext(sourceFactory, context, pipelineRuntime, datasetContext, stageSpec);
}
};
submitterPlugin = new SubmitterPlugin(stageName, context, batchSource, contextProvider);
} else if (Transform.PLUGIN_TYPE.equals(pluginType)) {
Transform transform = pluginInstantiator.newPluginInstance(stageName, evaluator);
ContextProvider<StageSubmitterContext> contextProvider = new ContextProvider<StageSubmitterContext>() {
@Override
public StageSubmitterContext getContext(DatasetContext datasetContext) {
return new SparkBatchSourceContext(sourceFactory, context, pipelineRuntime, datasetContext, stageSpec);
}
};
submitterPlugin = new SubmitterPlugin(stageName, context, transform, contextProvider);
} else if (BatchSink.PLUGIN_TYPE.equals(pluginType) || isConnectorSink) {
BatchConfigurable<BatchSinkContext> batchSink = pluginInstantiator.newPluginInstance(stageName, evaluator);
ContextProvider<BatchSinkContext> contextProvider = new ContextProvider<BatchSinkContext>() {
@Override
public BatchSinkContext getContext(DatasetContext datasetContext) {
return new SparkBatchSinkContext(sinkFactory, context, pipelineRuntime, datasetContext, stageSpec);
}
};
submitterPlugin = new SubmitterPlugin(stageName, context, batchSink, contextProvider);
} else if (SparkSink.PLUGIN_TYPE.equals(pluginType)) {
BatchConfigurable<SparkPluginContext> sparkSink = pluginInstantiator.newPluginInstance(stageName, evaluator);
ContextProvider<SparkPluginContext> contextProvider = new ContextProvider<SparkPluginContext>() {
@Override
public SparkPluginContext getContext(DatasetContext datasetContext) {
return new BasicSparkPluginContext(context, pipelineRuntime, stageSpec, datasetContext, admin);
}
};
submitterPlugin = new SubmitterPlugin(stageName, context, sparkSink, contextProvider);
} else if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
BatchAggregator aggregator = pluginInstantiator.newPluginInstance(stageName, evaluator);
ContextProvider<DefaultAggregatorContext> contextProvider = new AggregatorContextProvider(pipelineRuntime, stageSpec, admin);
submitterPlugin = new SubmitterPlugin(stageName, context, aggregator, contextProvider);
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
BatchJoiner joiner = pluginInstantiator.newPluginInstance(stageName, evaluator);
ContextProvider<DefaultJoinerContext> contextProvider = new JoinerContextProvider(pipelineRuntime, stageSpec, admin);
submitterPlugin = new SubmitterPlugin<>(stageName, context, joiner, contextProvider, new SubmitterPlugin.PrepareAction<DefaultJoinerContext>() {
@Override
public void act(DefaultJoinerContext sparkJoinerContext) {
stagePartitions.put(stageName, sparkJoinerContext.getNumPartitions());
}
});
}
if (submitterPlugin != null) {
submitterPlugin.prepareRun();
finishers.add(submitterPlugin);
}
}
File configFile = File.createTempFile("HydratorSpark", ".config");
cleanupFiles.add(configFile);
try (Writer writer = Files.newBufferedWriter(configFile.toPath(), StandardCharsets.UTF_8)) {
SparkBatchSourceSinkFactoryInfo sourceSinkInfo = new SparkBatchSourceSinkFactoryInfo(sourceFactory, sinkFactory, stagePartitions);
writer.write(GSON.toJson(sourceSinkInfo));
}
finisher = new CompositeFinisher(finishers);
context.localize("HydratorSpark.config", configFile.toURI());
WorkflowToken token = context.getWorkflowToken();
if (token != null) {
for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
token.put(entry.getKey(), entry.getValue());
}
}
}
Aggregations