Search in sources :

Example 6 with Dataset

use of io.cdap.cdap.api.dataset.Dataset in project cdap by caskdata.

the class ExploreExecutorHttpHandler method doPartitionOperation.

private void doPartitionOperation(FullHttpRequest request, HttpResponder responder, DatasetId datasetId, PartitionOperation partitionOperation) {
    try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
        Dataset dataset;
        try {
            dataset = datasetInstantiator.getDataset(datasetId);
        } catch (Exception e) {
            LOG.error("Exception instantiating dataset {}.", datasetId, e);
            responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, "Exception instantiating dataset " + datasetId);
            return;
        }
        try {
            if (!(dataset instanceof PartitionedFileSet)) {
                responder.sendString(HttpResponseStatus.BAD_REQUEST, "not a partitioned dataset.");
                return;
            }
            Partitioning partitioning = ((PartitionedFileSet) dataset).getPartitioning();
            Reader reader = new InputStreamReader(new ByteBufInputStream(request.content()));
            Map<String, String> properties = GSON.fromJson(reader, new TypeToken<Map<String, String>>() {
            }.getType());
            PartitionKey partitionKey;
            try {
                partitionKey = PartitionedFileSetArguments.getOutputPartitionKey(properties, partitioning);
            } catch (Exception e) {
                responder.sendString(HttpResponseStatus.BAD_REQUEST, "invalid partition key: " + e.getMessage());
                return;
            }
            if (partitionKey == null) {
                responder.sendString(HttpResponseStatus.BAD_REQUEST, "no partition key was given.");
                return;
            }
            QueryHandle handle = partitionOperation.submitOperation(partitionKey, properties);
            if (handle == null) {
                return;
            }
            JsonObject json = new JsonObject();
            json.addProperty("handle", handle.getHandle());
            responder.sendJson(HttpResponseStatus.OK, json.toString());
        } finally {
            Closeables.closeQuietly(dataset);
        }
    } catch (Throwable e) {
        LOG.error("Got exception:", e);
        responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, e.getMessage());
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) Dataset(io.cdap.cdap.api.dataset.Dataset) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) JsonObject(com.google.gson.JsonObject) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) ByteBufInputStream(io.netty.buffer.ByteBufInputStream) ExploreException(io.cdap.cdap.explore.service.ExploreException) UnsupportedTypeException(io.cdap.cdap.api.data.schema.UnsupportedTypeException) UnauthorizedException(io.cdap.cdap.security.spi.authorization.UnauthorizedException) SQLException(java.sql.SQLException) JsonSyntaxException(com.google.gson.JsonSyntaxException) DatasetManagementException(io.cdap.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) BadRequestException(io.cdap.cdap.common.BadRequestException) Partitioning(io.cdap.cdap.api.dataset.lib.Partitioning) SystemDatasetInstantiator(io.cdap.cdap.data.dataset.SystemDatasetInstantiator) TypeToken(com.google.common.reflect.TypeToken) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) QueryHandle(io.cdap.cdap.proto.QueryHandle)

Example 7 with Dataset

use of io.cdap.cdap.api.dataset.Dataset in project cdap by caskdata.

the class DatasetModulesDeployer method loadAndDeployModule.

private void loadAndDeployModule(ClassLoader artifactClassLoader, String className, final Location jarLocation, String moduleName, NamespaceId namespaceId, String authorizingUser) throws Exception {
    // note: using app class loader to load module class
    @SuppressWarnings("unchecked") Class<Dataset> clazz = (Class<Dataset>) artifactClassLoader.loadClass(className);
    try {
        // note: we can deploy module or create module from Dataset class
        // note: it seems dangerous to instantiate dataset module here, but this will be fine when we move deploy into
        // isolated user's environment (e.g. separate yarn container)
        final DatasetModuleId moduleId = namespaceId.datasetModule(moduleName);
        final DatasetModule module;
        if (DatasetModule.class.isAssignableFrom(clazz)) {
            module = (DatasetModule) clazz.newInstance();
        } else if (Dataset.class.isAssignableFrom(clazz)) {
            if (systemDatasetFramework.hasSystemType(clazz.getName())) {
                return;
            }
            final DatasetTypeId typeId = namespaceId.datasetType(clazz.getName());
            boolean hasType = AuthorizationUtil.authorizeAs(authorizingUser, new Callable<Boolean>() {

                @Override
                public Boolean call() throws Exception {
                    return datasetFramework.hasType(typeId);
                }
            });
            if (hasType && !allowDatasetUncheckedUpgrade) {
                return;
            }
            module = new SingleTypeModule(clazz);
        } else {
            throw new IllegalArgumentException(String.format("Cannot use class %s to add dataset module: it must be of type DatasetModule or Dataset", clazz.getName()));
        }
        LOG.info("Adding module: {}", clazz.getName());
        AuthorizationUtil.authorizeAs(authorizingUser, new Callable<Void>() {

            @Override
            public Void call() throws Exception {
                datasetFramework.addModule(moduleId, module, jarLocation);
                return null;
            }
        });
    } catch (ModuleConflictException e) {
        LOG.info("Conflict while deploying module {}: {}", moduleName, e.getMessage());
        throw e;
    }
}
Also used : DatasetTypeId(io.cdap.cdap.proto.id.DatasetTypeId) Dataset(io.cdap.cdap.api.dataset.Dataset) DatasetModule(io.cdap.cdap.api.dataset.module.DatasetModule) Callable(java.util.concurrent.Callable) DatasetManagementException(io.cdap.cdap.api.dataset.DatasetManagementException) ModuleConflictException(io.cdap.cdap.data2.dataset2.ModuleConflictException) DatasetModuleId(io.cdap.cdap.proto.id.DatasetModuleId) ModuleConflictException(io.cdap.cdap.data2.dataset2.ModuleConflictException) SingleTypeModule(io.cdap.cdap.data2.dataset2.SingleTypeModule)

Example 8 with Dataset

use of io.cdap.cdap.api.dataset.Dataset in project cdap by caskdata.

the class SingleThreadDatasetCache method createDatasetInstance.

/**
 * Creates a new instance of a dataset based on the given information.
 */
private Dataset createDatasetInstance(DatasetCacheKey key, boolean recordLineage) {
    DatasetId datasetId = new DatasetId(key.getNamespace(), key.getName());
    Dataset dataset = instantiator.getDataset(datasetId, key.getArguments(), key.getAccessType());
    if (dataset instanceof MeteredDataset && metricsContext != null) {
        ((MeteredDataset) dataset).setMetricsCollector(metricsContext.childContext(Constants.Metrics.Tag.DATASET, key.getName()));
    }
    if (recordLineage) {
        instantiator.writeLineage(datasetId, key.getAccessType());
    }
    return dataset;
}
Also used : Dataset(io.cdap.cdap.api.dataset.Dataset) MeteredDataset(io.cdap.cdap.api.dataset.metrics.MeteredDataset) MeteredDataset(io.cdap.cdap.api.dataset.metrics.MeteredDataset) DatasetId(io.cdap.cdap.proto.id.DatasetId)

Example 9 with Dataset

use of io.cdap.cdap.api.dataset.Dataset in project cdap by caskdata.

the class SingleTypeModule method register.

@Override
public void register(DatasetDefinitionRegistry registry) {
    final Constructor ctor = findSuitableCtorOrFail(dataSetClass);
    DatasetType typeAnn = dataSetClass.getAnnotation(DatasetType.class);
    // default type name to dataset class name
    String typeName = typeAnn != null ? typeAnn.value() : dataSetClass.getName();
    // The ordering is important. It is the same order as the parameters
    final Map<String, DatasetDefinition> embeddedDefinitions = Maps.newLinkedHashMap();
    final Class<?>[] paramTypes = ctor.getParameterTypes();
    Annotation[][] paramAnns = ctor.getParameterAnnotations();
    // Gather all dataset name and type information for the @EmbeddedDataset parameters
    for (int i = 1; i < paramTypes.length; i++) {
        // Must have the EmbeddedDataset as it's the contract of the findSuitableCtorOrFail method
        EmbeddedDataset anno = Iterables.filter(Arrays.asList(paramAnns[i]), EmbeddedDataset.class).iterator().next();
        String type = anno.type();
        // default to dataset class name if dataset type name is not specified through the annotation
        if (EmbeddedDataset.DEFAULT_TYPE_NAME.equals(type)) {
            type = paramTypes[i].getName();
        }
        DatasetDefinition embeddedDefinition = registry.get(type);
        if (embeddedDefinition == null) {
            throw new IllegalStateException(String.format("Unknown Dataset type '%s', specified by parameter number %d of the %s Dataset", type, i, dataSetClass.getName()));
        }
        embeddedDefinitions.put(anno.value(), embeddedDefinition);
    }
    registry.add(new CompositeDatasetDefinition<Dataset>(typeName, embeddedDefinitions) {

        @Override
        public Dataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
            List<Object> params = new ArrayList<>();
            params.add(spec);
            for (Map.Entry<String, DatasetDefinition> entry : embeddedDefinitions.entrySet()) {
                params.add(entry.getValue().getDataset(datasetContext, spec.getSpecification(entry.getKey()), arguments, classLoader));
            }
            try {
                return (Dataset) ctor.newInstance(params.toArray());
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }
    });
}
Also used : EmbeddedDataset(io.cdap.cdap.api.dataset.module.EmbeddedDataset) Constructor(java.lang.reflect.Constructor) EmbeddedDataset(io.cdap.cdap.api.dataset.module.EmbeddedDataset) Dataset(io.cdap.cdap.api.dataset.Dataset) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) DatasetType(io.cdap.cdap.api.dataset.module.DatasetType) IOException(java.io.IOException) CompositeDatasetDefinition(io.cdap.cdap.api.dataset.lib.CompositeDatasetDefinition) DatasetDefinition(io.cdap.cdap.api.dataset.DatasetDefinition) IOException(java.io.IOException) ArrayList(java.util.ArrayList) List(java.util.List) DatasetContext(io.cdap.cdap.api.dataset.DatasetContext)

Example 10 with Dataset

use of io.cdap.cdap.api.dataset.Dataset in project cdap by caskdata.

the class BasicMapReduceContext method createInput.

private Input.InputFormatProviderInput createInput(Input.DatasetInput datasetInput) {
    String datasetName = datasetInput.getName();
    Map<String, String> datasetArgs = datasetInput.getArguments();
    // keep track of the original alias to set it on the created Input before returning it
    String originalAlias = datasetInput.getAlias();
    Dataset dataset;
    if (datasetInput.getNamespace() == null) {
        dataset = getDataset(datasetName, datasetArgs, AccessType.READ);
    } else {
        dataset = getDataset(datasetInput.getNamespace(), datasetName, datasetArgs, AccessType.READ);
    }
    DatasetInputFormatProvider datasetInputFormatProvider = new DatasetInputFormatProvider(datasetInput.getNamespace(), datasetName, datasetArgs, dataset, datasetInput.getSplits(), MapReduceBatchReadableInputFormat.class);
    return (Input.InputFormatProviderInput) Input.of(datasetName, datasetInputFormatProvider).alias(originalAlias);
}
Also used : DatasetInputFormatProvider(io.cdap.cdap.internal.app.runtime.batch.dataset.DatasetInputFormatProvider) Dataset(io.cdap.cdap.api.dataset.Dataset)

Aggregations

Dataset (io.cdap.cdap.api.dataset.Dataset)40 IOException (java.io.IOException)20 DatasetInstantiationException (io.cdap.cdap.api.data.DatasetInstantiationException)10 DatasetManagementException (io.cdap.cdap.api.dataset.DatasetManagementException)10 SystemDatasetInstantiator (io.cdap.cdap.data.dataset.SystemDatasetInstantiator)8 UnauthorizedException (io.cdap.cdap.security.spi.authorization.UnauthorizedException)8 UnsupportedTypeException (io.cdap.cdap.api.data.schema.UnsupportedTypeException)4 DatasetSpecification (io.cdap.cdap.api.dataset.DatasetSpecification)4 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)4 MeteredDataset (io.cdap.cdap.api.dataset.metrics.MeteredDataset)4 TopicNotFoundException (io.cdap.cdap.api.messaging.TopicNotFoundException)4 BadRequestException (io.cdap.cdap.common.BadRequestException)4 CustomDatasetApp (io.cdap.cdap.data2.dataset2.customds.CustomDatasetApp)4 CustomOperations (io.cdap.cdap.data2.dataset2.customds.CustomOperations)4 DefaultTopLevelExtendsDataset (io.cdap.cdap.data2.dataset2.customds.DefaultTopLevelExtendsDataset)4 DelegatingDataset (io.cdap.cdap.data2.dataset2.customds.DelegatingDataset)4 TopLevelDataset (io.cdap.cdap.data2.dataset2.customds.TopLevelDataset)4 TopLevelDirectDataset (io.cdap.cdap.data2.dataset2.customds.TopLevelDirectDataset)4 TopLevelExtendsDataset (io.cdap.cdap.data2.dataset2.customds.TopLevelExtendsDataset)4 ByteCodeClassLoader (io.cdap.cdap.internal.asm.ByteCodeClassLoader)4