use of co.cask.cdap.api.dataset.DatasetManagementException in project cdap by caskdata.
the class UsageRegistryTest method testUsageRegistry.
@Test
public void testUsageRegistry() {
// instantiate a usage registry
UsageRegistry registry = new DefaultUsageRegistry(new TransactionExecutorFactory() {
@Override
public TransactionExecutor createExecutor(Iterable<TransactionAware> iterable) {
return dsFrameworkUtil.newInMemoryTransactionExecutor(iterable);
}
}, new ForwardingDatasetFramework(dsFrameworkUtil.getFramework()) {
@Nullable
@Override
public <T extends Dataset> T getDataset(DatasetId datasetInstanceId, Map<String, String> arguments, @Nullable ClassLoader classLoader) throws DatasetManagementException, IOException {
T t = super.getDataset(datasetInstanceId, arguments, classLoader);
if (t instanceof UsageDataset) {
@SuppressWarnings("unchecked") T t1 = (T) new WrappedUsageDataset((UsageDataset) t);
return t1;
}
return t;
}
});
// register usage for a stream and a dataset for single and multiple "owners", including a non-program
registry.register(flow11, datasetInstance1);
registry.register(flow12, stream1);
registry.registerAll(ImmutableList.of(flow21, flow22), datasetInstance2);
registry.registerAll(ImmutableList.of(flow21, flow22), stream1);
int count = WrappedUsageDataset.registerCount;
// validate usage
Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
// register datasets again
registry.register(flow11, datasetInstance1);
registry.registerAll(ImmutableList.of(flow21, flow22), datasetInstance2);
// validate that this does re-register previous usages (DefaultUsageRegistry no longer avoids re-registration)
count += 3;
Assert.assertEquals(count, WrappedUsageDataset.registerCount);
// validate usage
Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
// unregister app
registry.unregister(flow11.getParent());
// validate usage for that app is gone
Assert.assertEquals(ImmutableSet.of(), registry.getDatasets(flow11));
Assert.assertEquals(ImmutableSet.of(), registry.getStreams(flow12));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
Assert.assertEquals(ImmutableSet.of(), registry.getPrograms(datasetInstance1));
Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(stream1));
// register application 1 again
registry.register(flow11, datasetInstance1);
registry.register(flow12, stream1);
// validate it was re-registered
Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
// validate that this actually re-registered previous usages (through code in wrapped usage dataset)
Assert.assertEquals(count + 2, WrappedUsageDataset.registerCount);
}
use of co.cask.cdap.api.dataset.DatasetManagementException in project cdap by caskdata.
the class DatasetSerDe method getDatasetSchema.
private void getDatasetSchema(Configuration conf, DatasetId datasetId) throws SerDeException {
try (ContextManager.Context hiveContext = ContextManager.getContext(conf)) {
// Because it calls initialize just to get the object inspector
if (hiveContext == null) {
LOG.info("Hive provided a null conf, will not be able to get dataset schema.");
return;
}
// some datasets like Table and ObjectMappedTable have schema in the dataset properties
try {
DatasetSpecification datasetSpec = hiveContext.getDatasetSpec(datasetId);
String schemaStr = datasetSpec.getProperty("schema");
if (schemaStr != null) {
schema = Schema.parseJson(schemaStr);
return;
}
} catch (DatasetManagementException | ServiceUnavailableException e) {
throw new SerDeException("Could not instantiate dataset " + datasetId, e);
} catch (IOException e) {
throw new SerDeException("Exception getting schema for dataset " + datasetId, e);
}
// other datasets must be instantiated to get their schema
// conf is null if this is a query that writes to a dataset
ClassLoader parentClassLoader = conf == null ? null : conf.getClassLoader();
try (SystemDatasetInstantiator datasetInstantiator = hiveContext.createDatasetInstantiator(parentClassLoader)) {
Dataset dataset = datasetInstantiator.getDataset(datasetId);
if (dataset == null) {
throw new SerDeException("Could not find dataset " + datasetId);
}
Type recordType;
if (dataset instanceof RecordScannable) {
recordType = ((RecordScannable) dataset).getRecordType();
} else if (dataset instanceof RecordWritable) {
recordType = ((RecordWritable) dataset).getRecordType();
} else {
throw new SerDeException("Dataset " + datasetId + " is not explorable.");
}
schema = schemaGenerator.generate(recordType);
} catch (UnsupportedTypeException e) {
throw new SerDeException("Dataset " + datasetId + " has an unsupported schema.", e);
} catch (IOException e) {
throw new SerDeException("Exception while trying to instantiate dataset " + datasetId, e);
}
} catch (IOException e) {
throw new SerDeException("Could not get hive context from configuration.", e);
}
}
use of co.cask.cdap.api.dataset.DatasetManagementException in project cdap by caskdata.
the class WorkflowDriver method createLocalDatasets.
private void createLocalDatasets() throws IOException, DatasetManagementException {
String principal = programOptions.getArguments().getOption(ProgramOptionConstants.PRINCIPAL);
final KerberosPrincipalId principalId = principal == null ? null : new KerberosPrincipalId(principal);
for (final Map.Entry<String, String> entry : datasetFramework.getDatasetNameMapping().entrySet()) {
final String localInstanceName = entry.getValue();
final DatasetId instanceId = new DatasetId(workflowRunId.getNamespace(), localInstanceName);
final DatasetCreationSpec instanceSpec = workflowSpec.getLocalDatasetSpecs().get(entry.getKey());
LOG.debug("Adding Workflow local dataset instance: {}", localInstanceName);
try {
Retries.callWithRetries(new Retries.Callable<Void, Exception>() {
@Override
public Void call() throws Exception {
datasetFramework.addInstance(instanceSpec.getTypeName(), instanceId, addLocalDatasetProperty(instanceSpec.getProperties()), principalId);
return null;
}
}, RetryStrategies.fixDelay(Constants.Retry.LOCAL_DATASET_OPERATION_RETRY_DELAY_SECONDS, TimeUnit.SECONDS));
} catch (IOException | DatasetManagementException e) {
throw e;
} catch (Exception e) {
// this should never happen
throw new IllegalStateException(e);
}
}
}
use of co.cask.cdap.api.dataset.DatasetManagementException in project cdap by caskdata.
the class InMemoryDatasetFramework method deleteInstance.
@Override
public void deleteInstance(DatasetId instanceId) throws DatasetManagementException, IOException {
writeLock.lock();
try {
DatasetSpecification spec = instances.remove(instanceId.getParent(), instanceId);
if (spec == null) {
throw new InstanceNotFoundException(instanceId.getEntityName());
}
DatasetDefinition def = getDefinitionForType(instanceId.getParent(), spec.getType());
if (def == null) {
throw new DatasetManagementException(String.format("Dataset type '%s' is neither registered in the '%s' namespace nor in the system namespace", spec.getType(), instanceId.getParent()));
}
def.getAdmin(DatasetContext.from(instanceId.getNamespace()), spec, null).drop();
publishAudit(instanceId, AuditType.DELETE);
} finally {
writeLock.unlock();
}
}
use of co.cask.cdap.api.dataset.DatasetManagementException in project cdap by caskdata.
the class InMemoryDatasetFramework method addInstance.
@Override
public void addInstance(String datasetType, DatasetId datasetInstanceId, DatasetProperties props) throws DatasetManagementException, IOException {
writeLock.lock();
try {
if (instances.contains(datasetInstanceId.getParent(), datasetInstanceId)) {
throw new InstanceConflictException(String.format("Dataset instance '%s' already exists.", datasetInstanceId));
}
DatasetDefinition def = getDefinitionForType(datasetInstanceId.getParent(), datasetType);
if (def == null) {
throw new DatasetManagementException(String.format("Dataset type '%s' is neither registered in the '%s' namespace nor in the system namespace", datasetType, datasetInstanceId.getParent()));
}
DatasetSpecification spec = def.configure(datasetInstanceId.getEntityName(), props);
spec = spec.setOriginalProperties(props);
if (props.getDescription() != null) {
spec = spec.setDescription(props.getDescription());
}
def.getAdmin(DatasetContext.from(datasetInstanceId.getNamespace()), spec, null).create();
instances.put(datasetInstanceId.getParent(), datasetInstanceId, spec);
publishAudit(datasetInstanceId, AuditType.CREATE);
LOG.info("Created dataset {} of type {}", datasetInstanceId, datasetType);
} finally {
writeLock.unlock();
}
}
Aggregations