Search in sources :

Example 1 with Dataset

use of co.cask.cdap.api.dataset.Dataset in project cdap by caskdata.

the class BasicMapReduceTaskContext method getBatchReadable.

   * Returns a {@link BatchReadable} that reads data from the given dataset.
<K, V> BatchReadable<K, V> getBatchReadable(@Nullable String datasetNamespace, String datasetName, Map<String, String> datasetArgs) {
    Dataset dataset;
    if (datasetNamespace == null) {
        dataset = getDataset(datasetName, datasetArgs, AccessType.READ);
    } else {
        dataset = getDataset(datasetNamespace, datasetName, datasetArgs, AccessType.READ);
    // Must be BatchReadable.
    Preconditions.checkArgument(dataset instanceof BatchReadable, "Dataset '%s' is not a BatchReadable.", datasetName);
    @SuppressWarnings("unchecked") final BatchReadable<K, V> delegate = (BatchReadable<K, V>) dataset;
    return new BatchReadable<K, V>() {

        public List<Split> getSplits() {
            try {
                try {
                    return delegate.getSplits();
                } finally {
            } catch (Exception e) {
                throw Throwables.propagate(e);

        public SplitReader<K, V> createSplitReader(Split split) {
            return new ForwardingSplitReader<K, V>(delegate.createSplitReader(split)) {

                public void close() {
                    try {
                        try {
                        } finally {
                    } catch (Exception e) {
                        throw Throwables.propagate(e);
Also used : ForwardingSplitReader( Dataset(co.cask.cdap.api.dataset.Dataset) BatchReadable( Split( DatasetInstantiationException( FileNotFoundException( TopicNotFoundException(co.cask.cdap.api.messaging.TopicNotFoundException) IOException(

Example 2 with Dataset

use of co.cask.cdap.api.dataset.Dataset in project cdap by caskdata.

the class BasicMapReduceTaskContext method getBatchWritable.

   * Returns a {@link CloseableBatchWritable} that writes data to the given dataset.
<K, V> CloseableBatchWritable<K, V> getBatchWritable(String namespace, String datasetName, Map<String, String> datasetArgs) {
    Dataset dataset = getDataset(namespace, datasetName, datasetArgs, AccessType.WRITE);
    // Must be BatchWritable.
    Preconditions.checkArgument(dataset instanceof BatchWritable, "Dataset '%s:%s' is not a BatchWritable.", namespace, datasetName);
    @SuppressWarnings("unchecked") final BatchWritable<K, V> delegate = (BatchWritable<K, V>) dataset;
    return new CloseableBatchWritable<K, V>() {

        public void write(K k, V v) {
            delegate.write(k, v);

        public void close() throws IOException {
            try {
            } catch (Exception e) {
                Throwables.propagateIfInstanceOf(e, IOException.class);
                throw new IOException(e);
Also used : Dataset(co.cask.cdap.api.dataset.Dataset) IOException( CloseableBatchWritable( DatasetInstantiationException( FileNotFoundException( TopicNotFoundException(co.cask.cdap.api.messaging.TopicNotFoundException) IOException( CloseableBatchWritable( BatchWritable(

Example 3 with Dataset

use of co.cask.cdap.api.dataset.Dataset in project cdap by caskdata.

the class AuthorizationBootstrapperTest method test.

public void test() throws Exception {
    final Principal systemUser = new Principal(UserGroupInformation.getCurrentUser().getShortUserName(), Principal.PrincipalType.USER);
    // initial state: no privileges for system or admin users
    Predicate<EntityId> systemUserFilter = authorizationEnforcer.createFilter(systemUser);
    Predicate<EntityId> adminUserFilter = authorizationEnforcer.createFilter(ADMIN_USER);
    // privileges should be granted after running bootstrap;
    Tasks.waitFor(true, new Callable<Boolean>() {

        public Boolean call() throws Exception {
            Predicate<EntityId> systemUserFilter = authorizationEnforcer.createFilter(systemUser);
            Predicate<EntityId> adminUserFilter = authorizationEnforcer.createFilter(ADMIN_USER);
            return systemUserFilter.apply(instanceId) && systemUserFilter.apply(NamespaceId.SYSTEM) && adminUserFilter.apply(NamespaceId.DEFAULT);
    }, 10, TimeUnit.SECONDS);
    // ensure that the default namespace was created, and that the system user has privileges to access it
    Tasks.waitFor(true, new Callable<Boolean>() {

        public Boolean call() throws Exception {
            try {
                return namespaceQueryAdmin.exists(NamespaceId.DEFAULT);
            } catch (Exception e) {
                return false;
    }, 10, TimeUnit.SECONDS);
    // ensure that the system artifact was deployed, and that the system user has privileges to access it
    // this will throw an ArtifactNotFoundException if the artifact was not deployed, and UnauthorizedException if
    // the user does not have required privileges
    Tasks.waitFor(true, new Callable<Boolean>() {

        public Boolean call() throws Exception {
            try {
                return true;
            } catch (Exception e) {
                return false;
    }, 20, TimeUnit.SECONDS);
    // ensure that system datasets can be created by the system user
    Dataset systemDataset = DatasetsUtil.getOrCreateDataset(dsFramework, NamespaceId.SYSTEM.dataset("system-dataset"), Table.class.getName(), DatasetProperties.EMPTY, Collections.<String, String>emptyMap());
    // as part of bootstrapping, admin users were also granted admin privileges on the CDAP instance, so they can
    // create namespaces
    namespaceAdmin.create(new NamespaceMeta.Builder().setName("success").build());
    try {
        namespaceAdmin.create(new NamespaceMeta.Builder().setName("failure").build());"Bob should not have been able to create a namespace since he is not an admin user");
    } catch (UnauthorizedException expected) {
    // expected
Also used : Table(co.cask.cdap.api.dataset.table.Table) Dataset(co.cask.cdap.api.dataset.Dataset) UnauthorizedException( IOException( Predicate(co.cask.cdap.api.Predicate) EntityId( NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) UnauthorizedException( Principal( Test(org.junit.Test)

Example 4 with Dataset

use of co.cask.cdap.api.dataset.Dataset in project cdap by caskdata.

the class DatasetSerDe method getDatasetSchema.

private void getDatasetSchema(Configuration conf, DatasetId datasetId) throws SerDeException {
    try (ContextManager.Context hiveContext = ContextManager.getContext(conf)) {
        // Because it calls initialize just to get the object inspector
        if (hiveContext == null) {
  "Hive provided a null conf, will not be able to get dataset schema.");
        // some datasets like Table and ObjectMappedTable have schema in the dataset properties
        try {
            DatasetSpecification datasetSpec = hiveContext.getDatasetSpec(datasetId);
            String schemaStr = datasetSpec.getProperty("schema");
            if (schemaStr != null) {
                schema = Schema.parseJson(schemaStr);
        } catch (DatasetManagementException | ServiceUnavailableException e) {
            throw new SerDeException("Could not instantiate dataset " + datasetId, e);
        } catch (IOException e) {
            throw new SerDeException("Exception getting schema for dataset " + datasetId, e);
        // other datasets must be instantiated to get their schema
        // conf is null if this is a query that writes to a dataset
        ClassLoader parentClassLoader = conf == null ? null : conf.getClassLoader();
        try (SystemDatasetInstantiator datasetInstantiator = hiveContext.createDatasetInstantiator(parentClassLoader)) {
            Dataset dataset = datasetInstantiator.getDataset(datasetId);
            if (dataset == null) {
                throw new SerDeException("Could not find dataset " + datasetId);
            Type recordType;
            if (dataset instanceof RecordScannable) {
                recordType = ((RecordScannable) dataset).getRecordType();
            } else if (dataset instanceof RecordWritable) {
                recordType = ((RecordWritable) dataset).getRecordType();
            } else {
                throw new SerDeException("Dataset " + datasetId + " is not explorable.");
            schema = schemaGenerator.generate(recordType);
        } catch (UnsupportedTypeException e) {
            throw new SerDeException("Dataset " + datasetId + " has an unsupported schema.", e);
        } catch (IOException e) {
            throw new SerDeException("Exception while trying to instantiate dataset " + datasetId, e);
    } catch (IOException e) {
        throw new SerDeException("Could not get hive context from configuration.", e);
Also used : RecordWritable( Dataset(co.cask.cdap.api.dataset.Dataset) DatasetSpecification(co.cask.cdap.api.dataset.DatasetSpecification) ServiceUnavailableException(co.cask.cdap.common.ServiceUnavailableException) IOException( RecordScannable( DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) Type(java.lang.reflect.Type) SystemDatasetInstantiator( ContextManager(co.cask.cdap.hive.context.ContextManager) UnsupportedTypeException( SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 5 with Dataset

use of co.cask.cdap.api.dataset.Dataset in project cdap by caskdata.

the class ExploreTableManager method updateDataset.

   * Update ad-hoc exploration on the given dataset by altering the corresponding Hive table. If exploration has
   * not been enabled on the dataset, this will fail. Assumes the dataset actually exists.
   * @param datasetId the ID of the dataset to enable
   * @param spec the specification for the dataset to enable
   * @return query handle for creating the Hive table for the dataset
   * @throws IllegalArgumentException if some required dataset property like schema is not set
   * @throws UnsupportedTypeException if the schema of the dataset is not compatible with Hive
   * @throws ExploreException if there was an exception submitting the create table statement
   * @throws SQLException if there was a problem with the create table statement
   * @throws DatasetNotFoundException if the dataset had to be instantiated, but could not be found
   * @throws ClassNotFoundException if the was a missing class when instantiating the dataset
public QueryHandle updateDataset(DatasetId datasetId, DatasetSpecification spec, DatasetSpecification oldSpec) throws IllegalArgumentException, ExploreException, SQLException, UnsupportedTypeException, DatasetNotFoundException, ClassNotFoundException {
    String tableName = tableNaming.getTableName(datasetId, spec.getProperties());
    String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties());
    String oldTableName = tableNaming.getTableName(datasetId, oldSpec.getProperties());
    String oldDatabaseName = ExploreProperties.getExploreDatabaseName(oldSpec.getProperties());
    try {
        exploreService.getTableInfo(datasetId.getNamespace(), oldDatabaseName, oldTableName);
    } catch (TableNotFoundException e) {
        // but the new spec may be explorable, so attempt to enable it
        return enableDataset(datasetId, spec, false);
    List<String> alterStatements;
    if (!(oldTableName.equals(tableName) && Objects.equals(oldDatabaseName, databaseName))) {
        alterStatements = new ArrayList<>();
        // database/table name changed. All we can do is disable the old table and enable the new one
        String disableStatement = generateDisableStatement(datasetId, oldSpec);
        if (disableStatement != null) {
        String enableStatement = generateEnableStatement(datasetId, spec, false);
        if (enableStatement != null) {
    } else {
        Dataset dataset = null;
        try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
            dataset = datasetInstantiator.getDataset(datasetId);
            alterStatements = generateAlterStatements(datasetId, tableName, dataset, spec, oldSpec);
        } catch (IOException e) {
            LOG.error("Exception instantiating dataset {}.", datasetId, e);
            throw new ExploreException("Exception while trying to instantiate dataset " + datasetId);
        } finally {
    LOG.trace("alter statements for update: {}", alterStatements);
    if (alterStatements == null || alterStatements.isEmpty()) {
        return QueryHandle.NO_OP;
    if (alterStatements.size() == 1) {
        return exploreService.execute(datasetId.getParent(), alterStatements.get(0));
    return exploreService.execute(datasetId.getParent(), alterStatements.toArray(new String[alterStatements.size()]));
Also used : Dataset(co.cask.cdap.api.dataset.Dataset) SystemDatasetInstantiator( IOException(


Dataset (co.cask.cdap.api.dataset.Dataset)18 IOException ( DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)7 SystemDatasetInstantiator ( DatasetInstantiationException ( UnsupportedTypeException ( PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)3 BadRequestException (co.cask.cdap.common.BadRequestException)3 DatasetSpecification (co.cask.cdap.api.dataset.DatasetSpecification)2 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)2 Partitioning (co.cask.cdap.api.dataset.lib.Partitioning)2 TopicNotFoundException (co.cask.cdap.api.messaging.TopicNotFoundException)2 ServiceUnavailableException (co.cask.cdap.common.ServiceUnavailableException)2 CustomDatasetApp (co.cask.cdap.data2.dataset2.customds.CustomDatasetApp)2 CustomOperations (co.cask.cdap.data2.dataset2.customds.CustomOperations)2 DefaultTopLevelExtendsDataset (co.cask.cdap.data2.dataset2.customds.DefaultTopLevelExtendsDataset)2 DelegatingDataset (co.cask.cdap.data2.dataset2.customds.DelegatingDataset)2 TopLevelDataset (co.cask.cdap.data2.dataset2.customds.TopLevelDataset)2 TopLevelDirectDataset (co.cask.cdap.data2.dataset2.customds.TopLevelDirectDataset)2 TopLevelExtendsDataset (co.cask.cdap.data2.dataset2.customds.TopLevelExtendsDataset)2