Search in sources :

Example 1 with DatasetManagementException

use of co.cask.cdap.api.dataset.DatasetManagementException in project cdap by caskdata.

the class UsageRegistryTest method testUsageRegistry.

@Test
public void testUsageRegistry() {
    // instantiate a usage registry
    UsageRegistry registry = new DefaultUsageRegistry(new TransactionExecutorFactory() {

        @Override
        public TransactionExecutor createExecutor(Iterable<TransactionAware> iterable) {
            return dsFrameworkUtil.newInMemoryTransactionExecutor(iterable);
        }
    }, new ForwardingDatasetFramework(dsFrameworkUtil.getFramework()) {

        @Nullable
        @Override
        public <T extends Dataset> T getDataset(DatasetId datasetInstanceId, Map<String, String> arguments, @Nullable ClassLoader classLoader) throws DatasetManagementException, IOException {
            T t = super.getDataset(datasetInstanceId, arguments, classLoader);
            if (t instanceof UsageDataset) {
                @SuppressWarnings("unchecked") T t1 = (T) new WrappedUsageDataset((UsageDataset) t);
                return t1;
            }
            return t;
        }
    });
    // register usage for a stream and a dataset for single and multiple "owners", including a non-program
    registry.register(flow11, datasetInstance1);
    registry.register(flow12, stream1);
    registry.registerAll(ImmutableList.of(flow21, flow22), datasetInstance2);
    registry.registerAll(ImmutableList.of(flow21, flow22), stream1);
    int count = WrappedUsageDataset.registerCount;
    // validate usage
    Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
    Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
    Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
    Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
    // register datasets again
    registry.register(flow11, datasetInstance1);
    registry.registerAll(ImmutableList.of(flow21, flow22), datasetInstance2);
    // validate that this does re-register previous usages (DefaultUsageRegistry no longer avoids re-registration)
    count += 3;
    Assert.assertEquals(count, WrappedUsageDataset.registerCount);
    // validate usage
    Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
    Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
    Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
    Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
    // unregister app
    registry.unregister(flow11.getParent());
    // validate usage for that app is gone
    Assert.assertEquals(ImmutableSet.of(), registry.getDatasets(flow11));
    Assert.assertEquals(ImmutableSet.of(), registry.getStreams(flow12));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
    Assert.assertEquals(ImmutableSet.of(), registry.getPrograms(datasetInstance1));
    Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
    Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(stream1));
    // register application 1 again
    registry.register(flow11, datasetInstance1);
    registry.register(flow12, stream1);
    // validate it was re-registered
    Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
    Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
    Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
    Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
    // validate that this actually re-registered previous usages (through code in wrapped usage dataset)
    Assert.assertEquals(count + 2, WrappedUsageDataset.registerCount);
}
Also used : TransactionExecutor(org.apache.tephra.TransactionExecutor) IOException(java.io.IOException) TransactionExecutorFactory(org.apache.tephra.TransactionExecutorFactory) DatasetId(co.cask.cdap.proto.id.DatasetId) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) TransactionAware(org.apache.tephra.TransactionAware) ForwardingDatasetFramework(co.cask.cdap.data2.dataset2.ForwardingDatasetFramework) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 2 with DatasetManagementException

use of co.cask.cdap.api.dataset.DatasetManagementException in project cdap by caskdata.

the class DatasetSerDe method getDatasetSchema.

private void getDatasetSchema(Configuration conf, DatasetId datasetId) throws SerDeException {
    try (ContextManager.Context hiveContext = ContextManager.getContext(conf)) {
        // Because it calls initialize just to get the object inspector
        if (hiveContext == null) {
            LOG.info("Hive provided a null conf, will not be able to get dataset schema.");
            return;
        }
        // some datasets like Table and ObjectMappedTable have schema in the dataset properties
        try {
            DatasetSpecification datasetSpec = hiveContext.getDatasetSpec(datasetId);
            String schemaStr = datasetSpec.getProperty("schema");
            if (schemaStr != null) {
                schema = Schema.parseJson(schemaStr);
                return;
            }
        } catch (DatasetManagementException | ServiceUnavailableException e) {
            throw new SerDeException("Could not instantiate dataset " + datasetId, e);
        } catch (IOException e) {
            throw new SerDeException("Exception getting schema for dataset " + datasetId, e);
        }
        // other datasets must be instantiated to get their schema
        // conf is null if this is a query that writes to a dataset
        ClassLoader parentClassLoader = conf == null ? null : conf.getClassLoader();
        try (SystemDatasetInstantiator datasetInstantiator = hiveContext.createDatasetInstantiator(parentClassLoader)) {
            Dataset dataset = datasetInstantiator.getDataset(datasetId);
            if (dataset == null) {
                throw new SerDeException("Could not find dataset " + datasetId);
            }
            Type recordType;
            if (dataset instanceof RecordScannable) {
                recordType = ((RecordScannable) dataset).getRecordType();
            } else if (dataset instanceof RecordWritable) {
                recordType = ((RecordWritable) dataset).getRecordType();
            } else {
                throw new SerDeException("Dataset " + datasetId + " is not explorable.");
            }
            schema = schemaGenerator.generate(recordType);
        } catch (UnsupportedTypeException e) {
            throw new SerDeException("Dataset " + datasetId + " has an unsupported schema.", e);
        } catch (IOException e) {
            throw new SerDeException("Exception while trying to instantiate dataset " + datasetId, e);
        }
    } catch (IOException e) {
        throw new SerDeException("Could not get hive context from configuration.", e);
    }
}
Also used : RecordWritable(co.cask.cdap.api.data.batch.RecordWritable) Dataset(co.cask.cdap.api.dataset.Dataset) DatasetSpecification(co.cask.cdap.api.dataset.DatasetSpecification) ServiceUnavailableException(co.cask.cdap.common.ServiceUnavailableException) IOException(java.io.IOException) RecordScannable(co.cask.cdap.api.data.batch.RecordScannable) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) Type(java.lang.reflect.Type) SystemDatasetInstantiator(co.cask.cdap.data.dataset.SystemDatasetInstantiator) ContextManager(co.cask.cdap.hive.context.ContextManager) UnsupportedTypeException(co.cask.cdap.api.data.schema.UnsupportedTypeException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 3 with DatasetManagementException

use of co.cask.cdap.api.dataset.DatasetManagementException in project cdap by caskdata.

the class WorkflowDriver method createLocalDatasets.

private void createLocalDatasets() throws IOException, DatasetManagementException {
    String principal = programOptions.getArguments().getOption(ProgramOptionConstants.PRINCIPAL);
    final KerberosPrincipalId principalId = principal == null ? null : new KerberosPrincipalId(principal);
    for (final Map.Entry<String, String> entry : datasetFramework.getDatasetNameMapping().entrySet()) {
        final String localInstanceName = entry.getValue();
        final DatasetId instanceId = new DatasetId(workflowRunId.getNamespace(), localInstanceName);
        final DatasetCreationSpec instanceSpec = workflowSpec.getLocalDatasetSpecs().get(entry.getKey());
        LOG.debug("Adding Workflow local dataset instance: {}", localInstanceName);
        try {
            Retries.callWithRetries(new Retries.Callable<Void, Exception>() {

                @Override
                public Void call() throws Exception {
                    datasetFramework.addInstance(instanceSpec.getTypeName(), instanceId, addLocalDatasetProperty(instanceSpec.getProperties()), principalId);
                    return null;
                }
            }, RetryStrategies.fixDelay(Constants.Retry.LOCAL_DATASET_OPERATION_RETRY_DELAY_SECONDS, TimeUnit.SECONDS));
        } catch (IOException | DatasetManagementException e) {
            throw e;
        } catch (Exception e) {
            // this should never happen
            throw new IllegalStateException(e);
        }
    }
}
Also used : IOException(java.io.IOException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) DatasetId(co.cask.cdap.proto.id.DatasetId) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) Retries(co.cask.cdap.common.service.Retries) DatasetCreationSpec(co.cask.cdap.internal.dataset.DatasetCreationSpec) KerberosPrincipalId(co.cask.cdap.proto.id.KerberosPrincipalId) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 4 with DatasetManagementException

use of co.cask.cdap.api.dataset.DatasetManagementException in project cdap by caskdata.

the class InMemoryDatasetFramework method deleteInstance.

@Override
public void deleteInstance(DatasetId instanceId) throws DatasetManagementException, IOException {
    writeLock.lock();
    try {
        DatasetSpecification spec = instances.remove(instanceId.getParent(), instanceId);
        if (spec == null) {
            throw new InstanceNotFoundException(instanceId.getEntityName());
        }
        DatasetDefinition def = getDefinitionForType(instanceId.getParent(), spec.getType());
        if (def == null) {
            throw new DatasetManagementException(String.format("Dataset type '%s' is neither registered in the '%s' namespace nor in the system namespace", spec.getType(), instanceId.getParent()));
        }
        def.getAdmin(DatasetContext.from(instanceId.getNamespace()), spec, null).drop();
        publishAudit(instanceId, AuditType.DELETE);
    } finally {
        writeLock.unlock();
    }
}
Also used : DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) InstanceNotFoundException(co.cask.cdap.api.dataset.InstanceNotFoundException) DatasetSpecification(co.cask.cdap.api.dataset.DatasetSpecification) AbstractDatasetDefinition(co.cask.cdap.api.dataset.lib.AbstractDatasetDefinition) DatasetDefinition(co.cask.cdap.api.dataset.DatasetDefinition)

Example 5 with DatasetManagementException

use of co.cask.cdap.api.dataset.DatasetManagementException in project cdap by caskdata.

the class InMemoryDatasetFramework method addInstance.

@Override
public void addInstance(String datasetType, DatasetId datasetInstanceId, DatasetProperties props) throws DatasetManagementException, IOException {
    writeLock.lock();
    try {
        if (instances.contains(datasetInstanceId.getParent(), datasetInstanceId)) {
            throw new InstanceConflictException(String.format("Dataset instance '%s' already exists.", datasetInstanceId));
        }
        DatasetDefinition def = getDefinitionForType(datasetInstanceId.getParent(), datasetType);
        if (def == null) {
            throw new DatasetManagementException(String.format("Dataset type '%s' is neither registered in the '%s' namespace nor in the system namespace", datasetType, datasetInstanceId.getParent()));
        }
        DatasetSpecification spec = def.configure(datasetInstanceId.getEntityName(), props);
        spec = spec.setOriginalProperties(props);
        if (props.getDescription() != null) {
            spec = spec.setDescription(props.getDescription());
        }
        def.getAdmin(DatasetContext.from(datasetInstanceId.getNamespace()), spec, null).create();
        instances.put(datasetInstanceId.getParent(), datasetInstanceId, spec);
        publishAudit(datasetInstanceId, AuditType.CREATE);
        LOG.info("Created dataset {} of type {}", datasetInstanceId, datasetType);
    } finally {
        writeLock.unlock();
    }
}
Also used : DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) InstanceConflictException(co.cask.cdap.api.dataset.InstanceConflictException) DatasetSpecification(co.cask.cdap.api.dataset.DatasetSpecification) AbstractDatasetDefinition(co.cask.cdap.api.dataset.lib.AbstractDatasetDefinition) DatasetDefinition(co.cask.cdap.api.dataset.DatasetDefinition)

Aggregations

DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)28 IOException (java.io.IOException)14 DatasetSpecification (co.cask.cdap.api.dataset.DatasetSpecification)8 DatasetId (co.cask.cdap.proto.id.DatasetId)6 DatasetDefinition (co.cask.cdap.api.dataset.DatasetDefinition)5 AbstractDatasetDefinition (co.cask.cdap.api.dataset.lib.AbstractDatasetDefinition)5 Dataset (co.cask.cdap.api.dataset.Dataset)4 InstanceConflictException (co.cask.cdap.api.dataset.InstanceConflictException)4 ServiceUnavailableException (co.cask.cdap.common.ServiceUnavailableException)4 DatasetProperties (co.cask.cdap.api.dataset.DatasetProperties)3 InstanceNotFoundException (co.cask.cdap.api.dataset.InstanceNotFoundException)3 Table (co.cask.cdap.api.dataset.table.Table)3 Nullable (javax.annotation.Nullable)3 TransactionFailureException (org.apache.tephra.TransactionFailureException)3 Test (org.junit.Test)3 FileSet (co.cask.cdap.api.dataset.lib.FileSet)2 SystemDatasetInstantiator (co.cask.cdap.data.dataset.SystemDatasetInstantiator)2 Principal (co.cask.cdap.proto.security.Principal)2 AuthorizationEnforcer (co.cask.cdap.security.spi.authorization.AuthorizationEnforcer)2 HttpResponse (co.cask.common.http.HttpResponse)2