use of co.cask.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput in project cdap by caskdata.
the class MapReduceRuntimeService method destroy.
/**
* Calls the destroy method of {@link ProgramLifecycle}.
*/
private void destroy(final boolean succeeded, final String failureInfo) throws Exception {
// if any exception happens during output committing, we want the MapReduce to fail.
// for that to happen it is not sufficient to set the status to failed, we have to throw an exception,
// otherwise the shutdown completes successfully and the completed() callback is called.
// thus: remember the exception and throw it at the end.
final AtomicReference<Exception> failureCause = new AtomicReference<>();
// TODO (CDAP-1952): this should be done in the output committer, to make the M/R fail if addPartition fails
try {
context.execute(new TxRunnable() {
@Override
public void run(DatasetContext ctxt) throws Exception {
ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(job.getConfiguration().getClassLoader());
try {
for (Map.Entry<String, ProvidedOutput> output : context.getOutputs().entrySet()) {
commitOutput(succeeded, output.getKey(), output.getValue().getOutputFormatProvider(), failureCause);
if (succeeded && failureCause.get() != null) {
// mapreduce was successful but this output committer failed: call onFailure() for all committers
for (ProvidedOutput toFail : context.getOutputs().values()) {
commitOutput(false, toFail.getAlias(), toFail.getOutputFormatProvider(), failureCause);
}
break;
}
}
// if there was a failure, we must throw an exception to fail the transaction
// this will roll back all the outputs and also make sure that postCommit() is not called
// throwing the failure cause: it will be wrapped in a TxFailure and handled in the outer catch()
Exception cause = failureCause.get();
if (cause != null) {
failureCause.set(null);
throw cause;
}
} finally {
ClassLoaders.setContextClassLoader(oldClassLoader);
}
}
});
} catch (TransactionFailureException e) {
LOG.error("Transaction failure when committing dataset outputs", e);
if (failureCause.get() != null) {
failureCause.get().addSuppressed(e);
} else {
failureCause.set(e);
}
}
final boolean success = succeeded && failureCause.get() == null;
context.setState(getProgramState(success, failureInfo));
final TransactionControl txControl = mapReduce instanceof ProgramLifecycle ? Transactions.getTransactionControl(TransactionControl.IMPLICIT, MapReduce.class, mapReduce, "destroy") : TransactionControl.IMPLICIT;
try {
if (TransactionControl.IMPLICIT == txControl) {
context.execute(new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
doDestroy(success);
}
});
} else {
doDestroy(success);
}
} catch (Throwable e) {
if (e instanceof TransactionFailureException && e.getCause() != null && !(e instanceof TransactionConflictException)) {
e = e.getCause();
}
LOG.warn("Error executing the destroy method of the MapReduce program {}", context.getProgram().getName(), e);
}
// this is needed to make the run fail if there was an exception. See comment at beginning of this method
if (failureCause.get() != null) {
throw failureCause.get();
}
}
use of co.cask.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput in project cdap by caskdata.
the class BasicMapReduceContext method addOutput.
@Override
public void addOutput(Output output) {
if (output.getNamespace() != null && output.getNamespace().equals(NamespaceId.SYSTEM.getNamespace()) && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) {
// trying to access system namespace from a program outside system namespace is not allowed
throw new IllegalArgumentException(String.format("Accessing Output %s in system namespace " + "is not allowed from the namespace %s", output.getName(), getProgram().getNamespaceId()));
}
String alias = output.getAlias();
if (this.outputs.containsKey(alias)) {
throw new IllegalArgumentException("Output already configured: " + alias);
}
ProvidedOutput providedOutput;
if (output instanceof Output.DatasetOutput) {
providedOutput = Outputs.transform((Output.DatasetOutput) output, this);
} else if (output instanceof Output.OutputFormatProviderOutput) {
OutputFormatProvider outputFormatProvider = ((Output.OutputFormatProviderOutput) output).getOutputFormatProvider();
if (outputFormatProvider instanceof DatasetOutputCommitter) {
// be able to call its methods in MainOutputCommitter. It needs to be a DatasetOutput.
throw new IllegalArgumentException("Cannot add a DatasetOutputCommitter as an OutputFormatProviderOutput. " + "Add the output as a DatasetOutput.");
}
providedOutput = new ProvidedOutput(output, outputFormatProvider);
} else {
// shouldn't happen unless user defines their own Output class
throw new IllegalArgumentException(String.format("Output %s has unknown output class %s", output.getName(), output.getClass().getCanonicalName()));
}
this.outputs.put(alias, providedOutput);
}
use of co.cask.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput in project cdap by caskdata.
the class MapReduceContextConfig method setOutputs.
private void setOutputs(List<ProvidedOutput> providedOutputs) {
// we only need to serialize the original Output objects, not the entire ProvidedOutput
List<Output.DatasetOutput> datasetOutputs = new ArrayList<>();
for (ProvidedOutput providedOutput : providedOutputs) {
Output output = providedOutput.getOutput();
if (output instanceof Output.DatasetOutput) {
datasetOutputs.add((Output.DatasetOutput) output);
}
}
hConf.set(HCONF_ATTR_OUTPUTS, GSON.toJson(datasetOutputs));
}
use of co.cask.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput in project cdap by caskdata.
the class MapReduceRuntimeService method setOutputsIfNeeded.
/**
* Sets the configurations used for outputs.
*/
private void setOutputsIfNeeded(Job job) throws ClassNotFoundException {
List<ProvidedOutput> outputsMap = context.getOutputs();
fixOutputPermissions(job, outputsMap);
LOG.debug("Using as output for MapReduce Job: {}", outputsMap);
OutputFormatProvider rootOutputFormatProvider;
if (outputsMap.isEmpty()) {
// user is not going through our APIs to add output; propagate the job's output format
rootOutputFormatProvider = new BasicOutputFormatProvider(job.getOutputFormatClass().getName(), Collections.<String, String>emptyMap());
} else if (outputsMap.size() == 1) {
// If only one output is configured through the context, then set it as the root OutputFormat
rootOutputFormatProvider = outputsMap.get(0).getOutputFormatProvider();
} else {
// multiple output formats configured via the context. We should use a RecordWriter that doesn't support writing
// as the root output format in this case to disallow writing directly on the context.
// the OutputCommitter is effectively a no-op, as it runs as the RootOutputCommitter in MultipleOutputsCommitter
rootOutputFormatProvider = new BasicOutputFormatProvider(UnsupportedOutputFormat.class.getName(), Collections.<String, String>emptyMap());
}
MultipleOutputsMainOutputWrapper.setRootOutputFormat(job, rootOutputFormatProvider.getOutputFormatClassName(), rootOutputFormatProvider.getOutputFormatConfiguration());
job.setOutputFormatClass(MultipleOutputsMainOutputWrapper.class);
for (ProvidedOutput output : outputsMap) {
String outputName = output.getOutput().getAlias();
String outputFormatClassName = output.getOutputFormatClassName();
Map<String, String> outputConfig = output.getOutputFormatConfiguration();
MultipleOutputs.addNamedOutput(job, outputName, outputFormatClassName, job.getOutputKeyClass(), job.getOutputValueClass(), outputConfig);
}
}
use of co.cask.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput in project cdap by caskdata.
the class MapReduceRuntimeService method fixOutputPermissions.
private void fixOutputPermissions(Job job, Map<String, ProvidedOutput> outputs) {
Configuration jobconf = job.getConfiguration();
Set<String> outputsWithUmask = new HashSet<>();
Set<String> outputUmasks = new HashSet<>();
for (Map.Entry<String, ProvidedOutput> entry : outputs.entrySet()) {
String umask = entry.getValue().getOutputFormatConfiguration().get(HADOOP_UMASK_PROPERTY);
if (umask != null) {
outputsWithUmask.add(entry.getKey());
outputUmasks.add(umask);
}
}
boolean allOutputsHaveUmask = outputsWithUmask.size() == outputs.size();
boolean allOutputsAgree = outputUmasks.size() == 1;
boolean jobConfHasUmask = isProgrammaticConfig(jobconf, HADOOP_UMASK_PROPERTY);
String jobConfUmask = jobconf.get(HADOOP_UMASK_PROPERTY);
boolean mustFixUmasks = false;
if (jobConfHasUmask) {
// case 1: job conf has a programmatic umask. It prevails.
mustFixUmasks = !outputsWithUmask.isEmpty();
if (mustFixUmasks) {
LOG.info("Overriding permissions of outputs {} because a umask of {} was set programmatically in the job " + "configuration.", outputsWithUmask, jobConfUmask);
}
} else if (allOutputsHaveUmask && allOutputsAgree) {
// case 2: no programmatic umask in job conf, all outputs want the same umask: set it in job conf
String umaskToUse = outputUmasks.iterator().next();
jobconf.set(HADOOP_UMASK_PROPERTY, umaskToUse);
LOG.debug("Setting umask of {} in job configuration because all outputs {} agree on it.", umaskToUse, outputsWithUmask);
} else {
// case 3: some outputs configure a umask, but not all of them, or not all the same: use job conf default
mustFixUmasks = !outputsWithUmask.isEmpty();
if (mustFixUmasks) {
LOG.warn("Overriding permissions of outputs {} because they configure different permissions. Falling back " + "to default umask of {} in job configuration.", outputsWithUmask, jobConfUmask);
}
}
// fix all output configurations that have a umask by removing that property from their configs
if (mustFixUmasks) {
for (String outputName : outputsWithUmask) {
ProvidedOutput output = outputs.get(outputName);
Map<String, String> outputConfig = new HashMap<>(output.getOutputFormatConfiguration());
outputConfig.remove(HADOOP_UMASK_PROPERTY);
outputs.put(outputName, new ProvidedOutput(output.getAlias(), output.getOutputFormatProvider(), output.getOutputFormatClassName(), outputConfig));
}
}
}
Aggregations