use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class HBaseMetricsTable method increment.
@Override
public void increment(NavigableMap<byte[], NavigableMap<byte[], Long>> updates) {
List<Put> puts = Lists.newArrayList();
for (Map.Entry<byte[], NavigableMap<byte[], Long>> row : updates.entrySet()) {
byte[] distributedKey = createDistributedRowKey(row.getKey());
Put increment = getIncrementalPut(distributedKey, row.getValue());
puts.add(increment);
}
try {
mutator.mutate(puts);
mutator.flush();
} catch (IOException e) {
// currently there is not other way to extract that from the HBase exception than string match
if (e.getMessage() != null && e.getMessage().contains("isn't 64 bits wide")) {
throw new NumberFormatException("Attempted to increment a value that is not convertible to long.");
}
throw new DataSetException("Increment failed on table " + tableId, e);
}
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class BufferingTable method get.
@ReadOnly
@Override
public Row get(byte[] row, byte[][] columns) {
ensureTransactionIsStarted();
reportRead(1);
try {
return new Result(row, getRowMap(row, columns));
} catch (Exception e) {
LOG.debug("get failed for table: " + getTransactionAwareName() + ", row: " + Bytes.toStringBinary(row), e);
throw new DataSetException("get failed", e);
}
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class LevelDBTable method scanPersisted.
@ReadOnly
@Override
protected Scanner scanPersisted(Scan scan) throws Exception {
FuzzyRowFilter filter = null;
if (scan.getFilter() != null) {
// todo: currently we support only FuzzyRowFilter as an experimental feature
if (scan.getFilter() instanceof FuzzyRowFilter) {
filter = (FuzzyRowFilter) scan.getFilter();
} else {
throw new DataSetException("Unknown filter type: " + scan.getFilter());
}
}
final Scanner scanner = core.scan(scan.getStartRow(), scan.getStopRow(), filter, null, tx);
return new Scanner() {
@Nullable
@Override
public Row next() {
return LevelDBTable.this.next(scanner);
}
@Override
public void close() {
scanner.close();
}
};
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class HiveExploreServiceFileSetTestRun method testPartitionedFileSet.
private void testPartitionedFileSet(@Nullable String dbName, @Nullable String tableName) throws Exception {
DatasetId datasetInstanceId = NAMESPACE_ID.dataset("parted");
String hiveTableName = getDatasetHiveName(datasetInstanceId);
String showTablesCommand = "show tables";
FileSetProperties.Builder props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addStringField("str").addIntField("num").build()).setBasePath("parted").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString());
if (tableName != null) {
props.setExploreTableName(tableName);
hiveTableName = tableName;
}
String queryTableName = hiveTableName;
if (dbName != null) {
props.setExploreDatabaseName(dbName);
runCommand(NAMESPACE_ID, "create database " + dbName, false, null, null);
showTablesCommand += " in " + dbName;
queryTableName = dbName + "." + queryTableName;
}
// create a time partitioned file set
datasetFramework.addInstance("partitionedFileSet", datasetInstanceId, props.build());
// verify that the hive table was created for this file set
runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
// Accessing dataset instance to perform data operations
final PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(partitioned);
FileSet fileSet = partitioned.getEmbeddedFileSet();
// add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
Location locationX1 = fileSet.getLocation("fileX1/nn");
Location locationY1 = fileSet.getLocation("fileY1/nn");
Location locationX2 = fileSet.getLocation("fileX2/nn");
Location locationY2 = fileSet.getLocation("fileY2/nn");
FileWriterHelper.generateAvroFile(locationX1.getOutputStream(), "x", 1, 2);
FileWriterHelper.generateAvroFile(locationY1.getOutputStream(), "y", 1, 2);
FileWriterHelper.generateAvroFile(locationX2.getOutputStream(), "x", 2, 3);
FileWriterHelper.generateAvroFile(locationY2.getOutputStream(), "y", 2, 3);
final PartitionKey keyX1 = PartitionKey.builder().addStringField("str", "x").addIntField("num", 1).build();
PartitionKey keyY1 = PartitionKey.builder().addStringField("str", "y").addIntField("num", 1).build();
final PartitionKey keyX2 = PartitionKey.builder().addStringField("str", "x").addIntField("num", 2).build();
PartitionKey keyY2 = PartitionKey.builder().addStringField("str", "y").addIntField("num", 2).build();
addPartition(partitioned, keyX1, "fileX1");
addPartition(partitioned, keyY1, "fileY1");
addPartition(partitioned, keyX2, "fileX2");
addPartition(partitioned, keyY2, "fileY2");
// verify that the partitions were added to Hive
validatePartitions(queryTableName, partitioned, ImmutableList.of(keyX1, keyX2, keyY1, keyY2));
// verify that count() and where... work in Hive
runCommand(NAMESPACE_ID, "SELECT count(*) AS count FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc("count", "BIGINT", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(4L))));
runCommand(NAMESPACE_ID, "SELECT * FROM " + queryTableName + " WHERE num = 2 ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc(hiveTableName + ".key", "STRING", 1, null), new ColumnDesc(hiveTableName + ".value", "STRING", 2, null), new ColumnDesc(hiveTableName + ".str", "STRING", 3, null), new ColumnDesc(hiveTableName + ".num", "INT", 4, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x2", "#2", "x", 2)), new QueryResult(Lists.<Object>newArrayList("y2", "#2", "y", 2))));
// drop a partition and query again
dropPartition(partitioned, keyX2);
validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
// attempt a transaction that drops one partition, adds another, and then fails
try {
doTransaction(partitioned, new Runnable() {
@Override
public void run() {
partitioned.dropPartition(keyX1);
partitioned.addPartition(keyX2, "fileX2");
Assert.fail("fail tx");
}
});
} catch (TransactionFailureException e) {
// expected
}
// validate that both the drop and addPartition were undone
validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
// attempt a transaction that attempts to add an existing partition, hence fails
try {
doTransaction(partitioned, new Runnable() {
@Override
public void run() {
partitioned.addPartition(keyX1, "fileX1");
throw new RuntimeException("on purpose");
}
});
} catch (TransactionFailureException e) {
// expected if the cause is not "on purpose"
Assert.assertTrue(e.getCause() instanceof DataSetException);
}
// validate that both the drop and addPartition were undone
validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
// drop a partition directly from hive
runCommand(NAMESPACE_ID, "ALTER TABLE " + queryTableName + " DROP PARTITION (str='y', num=2)", false, null, null);
// verify that one more value is gone now, namely y2, in Hive, but the PFS still has it
validatePartitionsInHive(queryTableName, ImmutableSet.of(keyX1, keyY1));
validatePartitionsInPFS(partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
// make sure the partition can still be dropped from the PFS dataset
dropPartition(partitioned, keyY2);
validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1));
// change the explore schema by updating the props
datasetFramework.updateInstance(datasetInstanceId, props.setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
// valudate the schema was updated
validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1), true);
// disable explore by updating the props
datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(false).build());
// verify the Hive table is gone
runCommand(NAMESPACE_ID, showTablesCommand, false, null, Collections.<QueryResult>emptyList());
// re-enable explore by updating the props
datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(true).build());
// verify the Hive table is back
runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
// drop the dataset
datasetFramework.deleteInstance(datasetInstanceId);
// verify the Hive table is gone
runCommand(NAMESPACE_ID, "show tables", false, null, Collections.<QueryResult>emptyList());
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class ApplicationVerificationStage method verifyData.
private void verifyData(ApplicationId appId, ApplicationSpecification specification, @Nullable KerberosPrincipalId ownerPrincipal) throws Exception {
// NOTE: no special restrictions on dataset module names, etc
VerifyResult result;
for (DatasetCreationSpec dataSetCreateSpec : specification.getDatasets().values()) {
result = getVerifier(DatasetCreationSpec.class).verify(appId, dataSetCreateSpec);
if (!result.isSuccess()) {
throw new RuntimeException(result.getMessage());
}
String dsName = dataSetCreateSpec.getInstanceName();
final DatasetId datasetInstanceId = appId.getParent().dataset(dsName);
// get the authorizing user
String authorizingUser = AuthorizationUtil.getAppAuthorizingUser(ownerAdmin, authenticationContext, appId, ownerPrincipal);
DatasetSpecification existingSpec = AuthorizationUtil.authorizeAs(authorizingUser, new Callable<DatasetSpecification>() {
@Override
public DatasetSpecification call() throws Exception {
return dsFramework.getDatasetSpec(datasetInstanceId);
}
});
if (existingSpec != null && !existingSpec.getType().equals(dataSetCreateSpec.getTypeName())) {
// New app trying to deploy an dataset with same instanceName but different Type than that of existing.
throw new DataSetException(String.format("Cannot Deploy Dataset : %s with Type : %s : Dataset with different Type Already Exists", dsName, dataSetCreateSpec.getTypeName()));
}
// if the dataset existed verify its owner is same.
if (existingSpec != null) {
verifyOwner(datasetInstanceId, ownerPrincipal);
}
}
}
Aggregations