use of co.cask.cdap.api.dataset.Dataset in project cdap by caskdata.
the class BasicMapReduceTaskContext method getBatchReadable.
/**
* Returns a {@link BatchReadable} that reads data from the given dataset.
*/
<K, V> BatchReadable<K, V> getBatchReadable(@Nullable String datasetNamespace, String datasetName, Map<String, String> datasetArgs) {
Dataset dataset;
if (datasetNamespace == null) {
dataset = getDataset(datasetName, datasetArgs, AccessType.READ);
} else {
dataset = getDataset(datasetNamespace, datasetName, datasetArgs, AccessType.READ);
}
// Must be BatchReadable.
Preconditions.checkArgument(dataset instanceof BatchReadable, "Dataset '%s' is not a BatchReadable.", datasetName);
@SuppressWarnings("unchecked") final BatchReadable<K, V> delegate = (BatchReadable<K, V>) dataset;
return new BatchReadable<K, V>() {
@Override
public List<Split> getSplits() {
try {
try {
return delegate.getSplits();
} finally {
flushOperations();
}
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
@Override
public SplitReader<K, V> createSplitReader(Split split) {
return new ForwardingSplitReader<K, V>(delegate.createSplitReader(split)) {
@Override
public void close() {
try {
try {
super.close();
} finally {
flushOperations();
}
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
};
}
};
}
use of co.cask.cdap.api.dataset.Dataset in project cdap by caskdata.
the class BasicMapReduceTaskContext method getBatchWritable.
/**
* Returns a {@link CloseableBatchWritable} that writes data to the given dataset.
*/
<K, V> CloseableBatchWritable<K, V> getBatchWritable(String namespace, String datasetName, Map<String, String> datasetArgs) {
Dataset dataset = getDataset(namespace, datasetName, datasetArgs, AccessType.WRITE);
// Must be BatchWritable.
Preconditions.checkArgument(dataset instanceof BatchWritable, "Dataset '%s:%s' is not a BatchWritable.", namespace, datasetName);
@SuppressWarnings("unchecked") final BatchWritable<K, V> delegate = (BatchWritable<K, V>) dataset;
return new CloseableBatchWritable<K, V>() {
@Override
public void write(K k, V v) {
delegate.write(k, v);
}
@Override
public void close() throws IOException {
try {
flushOperations();
} catch (Exception e) {
Throwables.propagateIfInstanceOf(e, IOException.class);
throw new IOException(e);
}
}
};
}
use of co.cask.cdap.api.dataset.Dataset in project cdap by caskdata.
the class AuthorizationBootstrapperTest method test.
@Test
public void test() throws Exception {
final Principal systemUser = new Principal(UserGroupInformation.getCurrentUser().getShortUserName(), Principal.PrincipalType.USER);
// initial state: no privileges for system or admin users
Predicate<EntityId> systemUserFilter = authorizationEnforcer.createFilter(systemUser);
Predicate<EntityId> adminUserFilter = authorizationEnforcer.createFilter(ADMIN_USER);
Assert.assertFalse(systemUserFilter.apply(instanceId));
Assert.assertFalse(systemUserFilter.apply(NamespaceId.SYSTEM));
Assert.assertFalse(adminUserFilter.apply(NamespaceId.DEFAULT));
// privileges should be granted after running bootstrap
authorizationBootstrapper.run();
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
Predicate<EntityId> systemUserFilter = authorizationEnforcer.createFilter(systemUser);
Predicate<EntityId> adminUserFilter = authorizationEnforcer.createFilter(ADMIN_USER);
return systemUserFilter.apply(instanceId) && systemUserFilter.apply(NamespaceId.SYSTEM) && adminUserFilter.apply(NamespaceId.DEFAULT);
}
}, 10, TimeUnit.SECONDS);
txManager.startAndWait();
datasetService.startAndWait();
waitForService(Constants.Service.DATASET_MANAGER);
defaultNamespaceEnsurer.startAndWait();
systemArtifactLoader.startAndWait();
waitForService(defaultNamespaceEnsurer);
waitForService(systemArtifactLoader);
// ensure that the default namespace was created, and that the system user has privileges to access it
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
try {
return namespaceQueryAdmin.exists(NamespaceId.DEFAULT);
} catch (Exception e) {
return false;
}
}
}, 10, TimeUnit.SECONDS);
Assert.assertTrue(defaultNamespaceEnsurer.isRunning());
// ensure that the system artifact was deployed, and that the system user has privileges to access it
// this will throw an ArtifactNotFoundException if the artifact was not deployed, and UnauthorizedException if
// the user does not have required privileges
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
try {
artifactRepository.getArtifact(SYSTEM_ARTIFACT.toId());
return true;
} catch (Exception e) {
return false;
}
}
}, 20, TimeUnit.SECONDS);
Assert.assertTrue(systemArtifactLoader.isRunning());
// ensure that system datasets can be created by the system user
Dataset systemDataset = DatasetsUtil.getOrCreateDataset(dsFramework, NamespaceId.SYSTEM.dataset("system-dataset"), Table.class.getName(), DatasetProperties.EMPTY, Collections.<String, String>emptyMap());
Assert.assertNotNull(systemDataset);
// as part of bootstrapping, admin users were also granted admin privileges on the CDAP instance, so they can
// create namespaces
SecurityRequestContext.setUserId(ADMIN_USER.getName());
namespaceAdmin.create(new NamespaceMeta.Builder().setName("success").build());
SecurityRequestContext.setUserId("bob");
try {
namespaceAdmin.create(new NamespaceMeta.Builder().setName("failure").build());
Assert.fail("Bob should not have been able to create a namespace since he is not an admin user");
} catch (UnauthorizedException expected) {
// expected
}
}
use of co.cask.cdap.api.dataset.Dataset in project cdap by caskdata.
the class DatasetSerDe method getDatasetSchema.
private void getDatasetSchema(Configuration conf, DatasetId datasetId) throws SerDeException {
try (ContextManager.Context hiveContext = ContextManager.getContext(conf)) {
// Because it calls initialize just to get the object inspector
if (hiveContext == null) {
LOG.info("Hive provided a null conf, will not be able to get dataset schema.");
return;
}
// some datasets like Table and ObjectMappedTable have schema in the dataset properties
try {
DatasetSpecification datasetSpec = hiveContext.getDatasetSpec(datasetId);
String schemaStr = datasetSpec.getProperty("schema");
if (schemaStr != null) {
schema = Schema.parseJson(schemaStr);
return;
}
} catch (DatasetManagementException | ServiceUnavailableException e) {
throw new SerDeException("Could not instantiate dataset " + datasetId, e);
} catch (IOException e) {
throw new SerDeException("Exception getting schema for dataset " + datasetId, e);
}
// other datasets must be instantiated to get their schema
// conf is null if this is a query that writes to a dataset
ClassLoader parentClassLoader = conf == null ? null : conf.getClassLoader();
try (SystemDatasetInstantiator datasetInstantiator = hiveContext.createDatasetInstantiator(parentClassLoader)) {
Dataset dataset = datasetInstantiator.getDataset(datasetId);
if (dataset == null) {
throw new SerDeException("Could not find dataset " + datasetId);
}
Type recordType;
if (dataset instanceof RecordScannable) {
recordType = ((RecordScannable) dataset).getRecordType();
} else if (dataset instanceof RecordWritable) {
recordType = ((RecordWritable) dataset).getRecordType();
} else {
throw new SerDeException("Dataset " + datasetId + " is not explorable.");
}
schema = schemaGenerator.generate(recordType);
} catch (UnsupportedTypeException e) {
throw new SerDeException("Dataset " + datasetId + " has an unsupported schema.", e);
} catch (IOException e) {
throw new SerDeException("Exception while trying to instantiate dataset " + datasetId, e);
}
} catch (IOException e) {
throw new SerDeException("Could not get hive context from configuration.", e);
}
}
use of co.cask.cdap.api.dataset.Dataset in project cdap by caskdata.
the class ExploreTableManager method updateDataset.
/**
* Update ad-hoc exploration on the given dataset by altering the corresponding Hive table. If exploration has
* not been enabled on the dataset, this will fail. Assumes the dataset actually exists.
*
* @param datasetId the ID of the dataset to enable
* @param spec the specification for the dataset to enable
* @return query handle for creating the Hive table for the dataset
* @throws IllegalArgumentException if some required dataset property like schema is not set
* @throws UnsupportedTypeException if the schema of the dataset is not compatible with Hive
* @throws ExploreException if there was an exception submitting the create table statement
* @throws SQLException if there was a problem with the create table statement
* @throws DatasetNotFoundException if the dataset had to be instantiated, but could not be found
* @throws ClassNotFoundException if the was a missing class when instantiating the dataset
*/
public QueryHandle updateDataset(DatasetId datasetId, DatasetSpecification spec, DatasetSpecification oldSpec) throws IllegalArgumentException, ExploreException, SQLException, UnsupportedTypeException, DatasetNotFoundException, ClassNotFoundException {
String tableName = tableNaming.getTableName(datasetId, spec.getProperties());
String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties());
String oldTableName = tableNaming.getTableName(datasetId, oldSpec.getProperties());
String oldDatabaseName = ExploreProperties.getExploreDatabaseName(oldSpec.getProperties());
try {
exploreService.getTableInfo(datasetId.getNamespace(), oldDatabaseName, oldTableName);
} catch (TableNotFoundException e) {
// but the new spec may be explorable, so attempt to enable it
return enableDataset(datasetId, spec, false);
}
List<String> alterStatements;
if (!(oldTableName.equals(tableName) && Objects.equals(oldDatabaseName, databaseName))) {
alterStatements = new ArrayList<>();
// database/table name changed. All we can do is disable the old table and enable the new one
String disableStatement = generateDisableStatement(datasetId, oldSpec);
if (disableStatement != null) {
alterStatements.add(disableStatement);
}
String enableStatement = generateEnableStatement(datasetId, spec, false);
if (enableStatement != null) {
alterStatements.add(enableStatement);
}
} else {
Dataset dataset = null;
try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
dataset = datasetInstantiator.getDataset(datasetId);
alterStatements = generateAlterStatements(datasetId, tableName, dataset, spec, oldSpec);
} catch (IOException e) {
LOG.error("Exception instantiating dataset {}.", datasetId, e);
throw new ExploreException("Exception while trying to instantiate dataset " + datasetId);
} finally {
Closeables.closeQuietly(dataset);
}
}
LOG.trace("alter statements for update: {}", alterStatements);
if (alterStatements == null || alterStatements.isEmpty()) {
return QueryHandle.NO_OP;
}
if (alterStatements.size() == 1) {
return exploreService.execute(datasetId.getParent(), alterStatements.get(0));
}
return exploreService.execute(datasetId.getParent(), alterStatements.toArray(new String[alterStatements.size()]));
}
Aggregations