use of io.cdap.cdap.api.dataset.Dataset in project cdap by caskdata.
the class ExploreExecutorHttpHandler method doPartitionOperation.
private void doPartitionOperation(FullHttpRequest request, HttpResponder responder, DatasetId datasetId, PartitionOperation partitionOperation) {
try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
Dataset dataset;
try {
dataset = datasetInstantiator.getDataset(datasetId);
} catch (Exception e) {
LOG.error("Exception instantiating dataset {}.", datasetId, e);
responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, "Exception instantiating dataset " + datasetId);
return;
}
try {
if (!(dataset instanceof PartitionedFileSet)) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "not a partitioned dataset.");
return;
}
Partitioning partitioning = ((PartitionedFileSet) dataset).getPartitioning();
Reader reader = new InputStreamReader(new ByteBufInputStream(request.content()));
Map<String, String> properties = GSON.fromJson(reader, new TypeToken<Map<String, String>>() {
}.getType());
PartitionKey partitionKey;
try {
partitionKey = PartitionedFileSetArguments.getOutputPartitionKey(properties, partitioning);
} catch (Exception e) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "invalid partition key: " + e.getMessage());
return;
}
if (partitionKey == null) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "no partition key was given.");
return;
}
QueryHandle handle = partitionOperation.submitOperation(partitionKey, properties);
if (handle == null) {
return;
}
JsonObject json = new JsonObject();
json.addProperty("handle", handle.getHandle());
responder.sendJson(HttpResponseStatus.OK, json.toString());
} finally {
Closeables.closeQuietly(dataset);
}
} catch (Throwable e) {
LOG.error("Got exception:", e);
responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, e.getMessage());
}
}
use of io.cdap.cdap.api.dataset.Dataset in project cdap by caskdata.
the class DatasetModulesDeployer method loadAndDeployModule.
private void loadAndDeployModule(ClassLoader artifactClassLoader, String className, final Location jarLocation, String moduleName, NamespaceId namespaceId, String authorizingUser) throws Exception {
// note: using app class loader to load module class
@SuppressWarnings("unchecked") Class<Dataset> clazz = (Class<Dataset>) artifactClassLoader.loadClass(className);
try {
// note: we can deploy module or create module from Dataset class
// note: it seems dangerous to instantiate dataset module here, but this will be fine when we move deploy into
// isolated user's environment (e.g. separate yarn container)
final DatasetModuleId moduleId = namespaceId.datasetModule(moduleName);
final DatasetModule module;
if (DatasetModule.class.isAssignableFrom(clazz)) {
module = (DatasetModule) clazz.newInstance();
} else if (Dataset.class.isAssignableFrom(clazz)) {
if (systemDatasetFramework.hasSystemType(clazz.getName())) {
return;
}
final DatasetTypeId typeId = namespaceId.datasetType(clazz.getName());
boolean hasType = AuthorizationUtil.authorizeAs(authorizingUser, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return datasetFramework.hasType(typeId);
}
});
if (hasType && !allowDatasetUncheckedUpgrade) {
return;
}
module = new SingleTypeModule(clazz);
} else {
throw new IllegalArgumentException(String.format("Cannot use class %s to add dataset module: it must be of type DatasetModule or Dataset", clazz.getName()));
}
LOG.info("Adding module: {}", clazz.getName());
AuthorizationUtil.authorizeAs(authorizingUser, new Callable<Void>() {
@Override
public Void call() throws Exception {
datasetFramework.addModule(moduleId, module, jarLocation);
return null;
}
});
} catch (ModuleConflictException e) {
LOG.info("Conflict while deploying module {}: {}", moduleName, e.getMessage());
throw e;
}
}
use of io.cdap.cdap.api.dataset.Dataset in project cdap by caskdata.
the class SingleThreadDatasetCache method createDatasetInstance.
/**
* Creates a new instance of a dataset based on the given information.
*/
private Dataset createDatasetInstance(DatasetCacheKey key, boolean recordLineage) {
DatasetId datasetId = new DatasetId(key.getNamespace(), key.getName());
Dataset dataset = instantiator.getDataset(datasetId, key.getArguments(), key.getAccessType());
if (dataset instanceof MeteredDataset && metricsContext != null) {
((MeteredDataset) dataset).setMetricsCollector(metricsContext.childContext(Constants.Metrics.Tag.DATASET, key.getName()));
}
if (recordLineage) {
instantiator.writeLineage(datasetId, key.getAccessType());
}
return dataset;
}
use of io.cdap.cdap.api.dataset.Dataset in project cdap by caskdata.
the class SingleTypeModule method register.
@Override
public void register(DatasetDefinitionRegistry registry) {
final Constructor ctor = findSuitableCtorOrFail(dataSetClass);
DatasetType typeAnn = dataSetClass.getAnnotation(DatasetType.class);
// default type name to dataset class name
String typeName = typeAnn != null ? typeAnn.value() : dataSetClass.getName();
// The ordering is important. It is the same order as the parameters
final Map<String, DatasetDefinition> embeddedDefinitions = Maps.newLinkedHashMap();
final Class<?>[] paramTypes = ctor.getParameterTypes();
Annotation[][] paramAnns = ctor.getParameterAnnotations();
// Gather all dataset name and type information for the @EmbeddedDataset parameters
for (int i = 1; i < paramTypes.length; i++) {
// Must have the EmbeddedDataset as it's the contract of the findSuitableCtorOrFail method
EmbeddedDataset anno = Iterables.filter(Arrays.asList(paramAnns[i]), EmbeddedDataset.class).iterator().next();
String type = anno.type();
// default to dataset class name if dataset type name is not specified through the annotation
if (EmbeddedDataset.DEFAULT_TYPE_NAME.equals(type)) {
type = paramTypes[i].getName();
}
DatasetDefinition embeddedDefinition = registry.get(type);
if (embeddedDefinition == null) {
throw new IllegalStateException(String.format("Unknown Dataset type '%s', specified by parameter number %d of the %s Dataset", type, i, dataSetClass.getName()));
}
embeddedDefinitions.put(anno.value(), embeddedDefinition);
}
registry.add(new CompositeDatasetDefinition<Dataset>(typeName, embeddedDefinitions) {
@Override
public Dataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
List<Object> params = new ArrayList<>();
params.add(spec);
for (Map.Entry<String, DatasetDefinition> entry : embeddedDefinitions.entrySet()) {
params.add(entry.getValue().getDataset(datasetContext, spec.getSpecification(entry.getKey()), arguments, classLoader));
}
try {
return (Dataset) ctor.newInstance(params.toArray());
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
});
}
use of io.cdap.cdap.api.dataset.Dataset in project cdap by caskdata.
the class BasicMapReduceContext method createInput.
private Input.InputFormatProviderInput createInput(Input.DatasetInput datasetInput) {
String datasetName = datasetInput.getName();
Map<String, String> datasetArgs = datasetInput.getArguments();
// keep track of the original alias to set it on the created Input before returning it
String originalAlias = datasetInput.getAlias();
Dataset dataset;
if (datasetInput.getNamespace() == null) {
dataset = getDataset(datasetName, datasetArgs, AccessType.READ);
} else {
dataset = getDataset(datasetInput.getNamespace(), datasetName, datasetArgs, AccessType.READ);
}
DatasetInputFormatProvider datasetInputFormatProvider = new DatasetInputFormatProvider(datasetInput.getNamespace(), datasetName, datasetArgs, dataset, datasetInput.getSplits(), MapReduceBatchReadableInputFormat.class);
return (Input.InputFormatProviderInput) Input.of(datasetName, datasetInputFormatProvider).alias(originalAlias);
}
Aggregations