use of co.cask.cdap.api.data.batch.OutputFormatProvider in project cdap by caskdata.
the class BasicMapReduceContext method addOutput.
@Override
public void addOutput(Output output) {
if (output.getNamespace() != null && output.getNamespace().equals(NamespaceId.SYSTEM.getNamespace()) && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) {
// trying to access system namespace from a program outside system namespace is not allowed
throw new IllegalArgumentException(String.format("Accessing Output %s in system namespace " + "is not allowed from the namespace %s", output.getName(), getProgram().getNamespaceId()));
}
String alias = output.getAlias();
if (this.outputs.containsKey(alias)) {
throw new IllegalArgumentException("Output already configured: " + alias);
}
ProvidedOutput providedOutput;
if (output instanceof Output.DatasetOutput) {
providedOutput = Outputs.transform((Output.DatasetOutput) output, this);
} else if (output instanceof Output.OutputFormatProviderOutput) {
OutputFormatProvider outputFormatProvider = ((Output.OutputFormatProviderOutput) output).getOutputFormatProvider();
if (outputFormatProvider instanceof DatasetOutputCommitter) {
// be able to call its methods in MainOutputCommitter. It needs to be a DatasetOutput.
throw new IllegalArgumentException("Cannot add a DatasetOutputCommitter as an OutputFormatProviderOutput. " + "Add the output as a DatasetOutput.");
}
providedOutput = new ProvidedOutput(output, outputFormatProvider);
} else {
// shouldn't happen unless user defines their own Output class
throw new IllegalArgumentException(String.format("Output %s has unknown output class %s", output.getName(), output.getClass().getCanonicalName()));
}
this.outputs.put(alias, providedOutput);
}
use of co.cask.cdap.api.data.batch.OutputFormatProvider in project cdap by caskdata.
the class MapReduceRuntimeService method setOutputsIfNeeded.
/**
* Sets the configurations used for outputs.
*/
private void setOutputsIfNeeded(Job job) throws ClassNotFoundException {
List<ProvidedOutput> outputsMap = context.getOutputs();
fixOutputPermissions(job, outputsMap);
LOG.debug("Using as output for MapReduce Job: {}", outputsMap);
OutputFormatProvider rootOutputFormatProvider;
if (outputsMap.isEmpty()) {
// user is not going through our APIs to add output; propagate the job's output format
rootOutputFormatProvider = new BasicOutputFormatProvider(job.getOutputFormatClass().getName(), Collections.<String, String>emptyMap());
} else if (outputsMap.size() == 1) {
// If only one output is configured through the context, then set it as the root OutputFormat
rootOutputFormatProvider = outputsMap.get(0).getOutputFormatProvider();
} else {
// multiple output formats configured via the context. We should use a RecordWriter that doesn't support writing
// as the root output format in this case to disallow writing directly on the context.
// the OutputCommitter is effectively a no-op, as it runs as the RootOutputCommitter in MultipleOutputsCommitter
rootOutputFormatProvider = new BasicOutputFormatProvider(UnsupportedOutputFormat.class.getName(), Collections.<String, String>emptyMap());
}
MultipleOutputsMainOutputWrapper.setRootOutputFormat(job, rootOutputFormatProvider.getOutputFormatClassName(), rootOutputFormatProvider.getOutputFormatConfiguration());
job.setOutputFormatClass(MultipleOutputsMainOutputWrapper.class);
for (ProvidedOutput output : outputsMap) {
String outputName = output.getOutput().getAlias();
String outputFormatClassName = output.getOutputFormatClassName();
Map<String, String> outputConfig = output.getOutputFormatConfiguration();
MultipleOutputs.addNamedOutput(job, outputName, outputFormatClassName, job.getOutputKeyClass(), job.getOutputValueClass(), outputConfig);
}
}
use of co.cask.cdap.api.data.batch.OutputFormatProvider in project cdap by caskdata.
the class MockExternalSink method prepareRun.
@Override
public void prepareRun(BatchSinkContext context) throws Exception {
OutputFormatProvider outputFormatProvider = new BasicOutputFormatProvider(TextOutputFormat.class.getCanonicalName(), ImmutableMap.of(TextOutputFormat.OUTDIR, config.dirName));
if (config.name != null) {
Output output = Output.of(config.name, outputFormatProvider);
output.alias(config.alias);
context.addOutput(output);
} else {
context.addOutput(Output.of(config.alias, outputFormatProvider));
}
}
use of co.cask.cdap.api.data.batch.OutputFormatProvider in project cdap by caskdata.
the class SparkBatchSinkFactory method writeFromRDD.
public <K, V> void writeFromRDD(JavaPairRDD<K, V> rdd, JavaSparkExecutionContext sec, String sinkName, Class<K> keyClass, Class<V> valueClass) {
Set<String> outputNames = sinkOutputs.get(sinkName);
if (outputNames == null || outputNames.isEmpty()) {
// should never happen if validation happened correctly at pipeline configure time
throw new IllegalArgumentException(sinkName + " has no outputs. " + "Please check that the sink calls addOutput at some point.");
}
for (String outputName : outputNames) {
OutputFormatProvider outputFormatProvider = outputFormatProviders.get(outputName);
if (outputFormatProvider != null) {
Configuration hConf = new Configuration();
hConf.clear();
for (Map.Entry<String, String> entry : outputFormatProvider.getOutputFormatConfiguration().entrySet()) {
hConf.set(entry.getKey(), entry.getValue());
}
hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatProvider.getOutputFormatClassName());
rdd.saveAsNewAPIHadoopDataset(hConf);
}
DatasetInfo datasetInfo = datasetInfos.get(outputName);
if (datasetInfo != null) {
sec.saveAsDataset(rdd, datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs());
}
}
}
use of co.cask.cdap.api.data.batch.OutputFormatProvider in project cdap by caskdata.
the class ExternalDatasets method makeTrackable.
/**
* If the output is an external sink then an external dataset is created for tracking purpose and returned.
* If the output is a regular dataset then it is already trackable, hence same output is returned.
*
* @param admin {@link Admin} used to create external dataset
* @param output output to be tracked
* @return an external dataset if output is an external sink, otherwise the same output is returned
*/
public static Output makeTrackable(Admin admin, Output output) {
// If output is not an external sink, return the same output as it can be tracked by itself.
if (!(output instanceof Output.OutputFormatProviderOutput)) {
return output;
}
// Output is an external sink, create an external dataset so that it can be tracked.
String outputName = output.getName();
OutputFormatProvider outputFormatProvider = ((Output.OutputFormatProviderOutput) output).getOutputFormatProvider();
Map<String, String> outputFormatConfiguration = outputFormatProvider.getOutputFormatConfiguration();
// this can be tracked by itself without creating an external dataset
if (outputFormatProvider instanceof Dataset) {
return output;
}
// Output is an external sink, create an external dataset so that it can be tracked.
try {
// Create an external dataset for the output format for lineage tracking
Map<String, String> arguments = new HashMap<>();
arguments.put("output.format.class", outputFormatProvider.getOutputFormatClassName());
arguments.putAll(outputFormatConfiguration);
if (!admin.datasetExists(outputName)) {
// Note: the dataset properties are the same as the arguments since we cannot identify them separately
// since they are mixed up in a single configuration object (CDAP-5674)
// Also, the properties of the external dataset created will contain runtime arguments for the same reason.
admin.createDataset(outputName, EXTERNAL_DATASET_TYPE, DatasetProperties.of(arguments));
} else {
// Check if the external dataset name clashes with an existing CDAP Dataset
String datasetType = admin.getDatasetType(outputName);
if (!EXTERNAL_DATASET_TYPE.equals(datasetType)) {
throw new IllegalArgumentException("An external sink cannot have the same name as an existing CDAP Dataset instance " + outputName);
}
}
return Output.ofDataset(outputName, Collections.unmodifiableMap(arguments)).alias(output.getAlias());
} catch (DatasetManagementException e) {
throw Throwables.propagate(e);
}
}
Aggregations