use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by cdapio.
the class TableTest method testBasicIncrementWriteWithTxSmall.
private void testBasicIncrementWriteWithTxSmall(boolean readless) throws Exception {
DatasetProperties props = TableProperties.builder().setReadlessIncrementSupport(readless).build();
DatasetAdmin admin = getTableAdmin(CONTEXT1, MY_TABLE, props);
admin.create();
try (Table myTable = getTable(CONTEXT1, MY_TABLE, props)) {
// start 1st tx
Transaction tx = txClient.startShort();
((TransactionAware) myTable).startTx(tx);
myTable.increment(R1, a(C1), la(-3L));
// we'll use this one to test that we can delete increment and increment again
myTable.increment(R2, a(C2), la(5L));
commitAndAssertSuccess(tx, (TransactionAware) myTable);
// start 2nd tx
tx = txClient.startShort();
((TransactionAware) myTable).startTx(tx);
Assert.assertArrayEquals(Bytes.toBytes(-3L), myTable.get(R1, C1));
myTable.increment(R1, a(C1), la(-3L));
Assert.assertArrayEquals(Bytes.toBytes(-6L), myTable.get(R1, C1));
Assert.assertArrayEquals(Bytes.toBytes(5L), myTable.get(R2, C2));
myTable.delete(R2, C2);
Assert.assertArrayEquals(null, myTable.get(R2, C2));
commitAndAssertSuccess(tx, (TransactionAware) myTable);
// start 3rd tx
tx = txClient.startShort();
((TransactionAware) myTable).startTx(tx);
Assert.assertArrayEquals(Bytes.toBytes(-6L), myTable.get(R1, C1));
Assert.assertArrayEquals(null, myTable.get(R2, C2));
myTable.increment(R2, a(C2), la(7L));
Assert.assertArrayEquals(Bytes.toBytes(7L), myTable.get(R2, C2));
commitAndAssertSuccess(tx, (TransactionAware) myTable);
// start 4rd tx
tx = txClient.startShort();
((TransactionAware) myTable).startTx(tx);
Assert.assertArrayEquals(Bytes.toBytes(7L), myTable.get(R2, C2));
commitAndAssertSuccess(tx, (TransactionAware) myTable);
} finally {
admin.drop();
}
}
use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by cdapio.
the class HiveExploreServiceFileSetTestRun method testPartitionedExisting.
private void testPartitionedExisting(String reuseProperty, boolean possessed) throws Exception {
final DatasetId dummyInstanceId = NAMESPACE_ID.dataset("dummy");
final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("tpExisting");
File path = new File(tmpFolder.newFolder(), "base");
String tableName = "reuse";
// create a PFS in order to create a table in Hive and add a partition
// create a time partitioned file set
DatasetProperties props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").build();
datasetFramework.addInstance(PartitionedFileSet.class.getName(), dummyInstanceId, props);
PartitionedFileSet dummy = datasetFramework.getDataset(dummyInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(dummy);
Location location = dummy.getEmbeddedFileSet().getLocation("number1").append("file1");
PartitionKey key = PartitionKey.builder().addIntField("number", 1).build();
FileWriterHelper.generateTextFile(location.getOutputStream(), ",", "x", 1, 2);
addPartition(dummy, key, "number1");
// validate data
List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").add(reuseProperty, "true").build();
datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetInstanceId, props);
PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(partitioned);
props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("k STRING, v INT").setExploreFormat("csv").add(reuseProperty, "true").build();
datasetFramework.updateInstance(datasetInstanceId, props);
// validate data
if (!possessed) {
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
} else {
List<ColumnDesc> newExpectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".k", "STRING", 1, null), new ColumnDesc(tableName + ".v", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, newExpectedColumns, null);
}
datasetFramework.deleteInstance(datasetInstanceId);
if (!possessed) {
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
} else {
runCommand(NAMESPACE_ID, "SHOW tables", false, null, Collections.<QueryResult>emptyList());
}
datasetFramework.deleteInstance(dummyInstanceId);
}
use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by cdapio.
the class ExternalDatasets method registerLineage.
/**
* Register lineage for this Spark program using the given reference name
*
* @param referenceName reference name used for source
* @param accessType the access type of the lineage
* @throws DatasetManagementException thrown if there was an error in creating reference dataset
*/
public static void registerLineage(Admin admin, String referenceName, AccessType accessType, @Nullable Schema schema, Supplier<Dataset> datasetSupplier) throws DatasetManagementException {
DatasetProperties datasetProperties;
if (schema == null) {
datasetProperties = DatasetProperties.EMPTY;
} else {
datasetProperties = DatasetProperties.of(Collections.singletonMap(DatasetProperties.SCHEMA, schema.toString()));
}
try {
if (!admin.datasetExists(referenceName)) {
admin.createDataset(referenceName, EXTERNAL_DATASET_TYPE, datasetProperties);
}
} catch (InstanceConflictException ex) {
// Might happen if this is executed in parallel across multiple pipeline runs.
}
// we cannot instantiate ExternalDataset here - it is in CDAP data-fabric,
// and this code (the pipeline app) cannot depend on that. Thus, use reflection
// to invoke a method on the dataset.
Dataset ds = datasetSupplier.get();
Class<? extends Dataset> dsClass = ds.getClass();
switch(accessType) {
case READ:
invokeMethod(referenceName, ds, dsClass, "recordRead", accessType);
break;
case WRITE:
invokeMethod(referenceName, ds, dsClass, "recordWrite", accessType);
break;
default:
LOG.warn("Failed to register lineage because of unknown access type {}", accessType);
}
}
use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by cdapio.
the class PartitionedFileSetDefinition method configure.
@Override
public DatasetSpecification configure(String instanceName, DatasetProperties properties) {
Partitioning partitioning = PartitionedFileSetProperties.getPartitioning(properties.getProperties());
Preconditions.checkArgument(partitioning != null, "Properties do not contain partitioning");
// define the columns for indexing on the partitionsTable
DatasetProperties indexedTableProperties = DatasetProperties.builder().addAll(properties.getProperties()).add(IndexedTable.INDEX_COLUMNS_CONF_KEY, INDEXED_COLS).build();
Map<String, String> pfsProperties = new HashMap<>(properties.getProperties());
// this property allows us to distinguish between datasets that were created
// before base path was explicitly set and those created after.
// this is important to know when a pfs is updated, as we want to keep the old base path behavior for
// previously created datasets
String defaultBasePathStr = properties.getProperties().get(NAME_AS_BASE_PATH_DEFAULT);
boolean useNameAsBasePathDefault = defaultBasePathStr == null || Boolean.parseBoolean(defaultBasePathStr);
DatasetProperties.Builder fileProperties = DatasetProperties.builder().addAll(properties.getProperties());
// and when the dataset is deleted, only the 'files' dir will be deleted and not the dataset name dir.
if (useNameAsBasePathDefault && !properties.getProperties().containsKey(FileSetProperties.BASE_PATH)) {
fileProperties.add(FileSetProperties.BASE_PATH, instanceName);
pfsProperties.put(NAME_AS_BASE_PATH_DEFAULT, Boolean.TRUE.toString());
}
return DatasetSpecification.builder(instanceName, getName()).properties(pfsProperties).datasets(filesetDef.configure(FILESET_NAME, fileProperties.build()), indexedTableDef.configure(PARTITION_TABLE_NAME, indexedTableProperties)).build();
}
use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by cdapio.
the class HBaseTableTest method testEnableIncrements.
@Test
public void testEnableIncrements() throws Exception {
// setup a table with increments disabled and with it enabled
String disableTableName = "incr-disable";
String enabledTableName = "incr-enable";
TableId disabledTableId = hBaseTableUtil.createHTableId(NAMESPACE1, disableTableName);
TableId enabledTableId = hBaseTableUtil.createHTableId(NAMESPACE1, enabledTableName);
DatasetProperties propsDisabled = TableProperties.builder().setReadlessIncrementSupport(false).setConflictDetection(ConflictDetection.COLUMN).build();
HBaseTableAdmin disabledAdmin = getTableAdmin(CONTEXT1, disableTableName, propsDisabled);
disabledAdmin.create();
HBaseAdmin admin = TEST_HBASE.getHBaseAdmin();
DatasetProperties propsEnabled = TableProperties.builder().setReadlessIncrementSupport(true).setConflictDetection(ConflictDetection.COLUMN).build();
HBaseTableAdmin enabledAdmin = getTableAdmin(CONTEXT1, enabledTableName, propsEnabled);
enabledAdmin.create();
try {
try {
HTableDescriptor htd = hBaseTableUtil.getHTableDescriptor(admin, disabledTableId);
List<String> cps = htd.getCoprocessors();
assertFalse(cps.contains(IncrementHandler.class.getName()));
htd = hBaseTableUtil.getHTableDescriptor(admin, enabledTableId);
cps = htd.getCoprocessors();
assertTrue(cps.contains(IncrementHandler.class.getName()));
} finally {
admin.close();
}
try (BufferingTable table = getTable(CONTEXT1, enabledTableName, propsEnabled)) {
byte[] row = Bytes.toBytes("row1");
byte[] col = Bytes.toBytes("col1");
DetachedTxSystemClient txSystemClient = new DetachedTxSystemClient();
Transaction tx = txSystemClient.startShort();
table.startTx(tx);
table.increment(row, col, 10);
table.commitTx();
// verify that value was written as a delta value
final byte[] expectedValue = Bytes.add(IncrementHandlerState.DELTA_MAGIC_PREFIX, Bytes.toBytes(10L));
final AtomicBoolean foundValue = new AtomicBoolean();
byte[] enabledTableNameBytes = hBaseTableUtil.getHTableDescriptor(admin, enabledTableId).getName();
TEST_HBASE.forEachRegion(enabledTableNameBytes, new Function<HRegion, Object>() {
@Override
public Object apply(HRegion hRegion) {
org.apache.hadoop.hbase.client.Scan scan = hBaseTableUtil.buildScan().build();
try {
RegionScanner scanner = hRegion.getScanner(scan);
List<Cell> results = Lists.newArrayList();
boolean hasMore;
do {
hasMore = scanner.next(results);
for (Cell cell : results) {
if (CellUtil.matchingValue(cell, expectedValue)) {
foundValue.set(true);
}
}
} while (hasMore);
} catch (IOException ioe) {
fail("IOException scanning region: " + ioe.getMessage());
}
return null;
}
});
assertTrue("Should have seen the expected encoded delta value in the " + enabledTableName + " table region", foundValue.get());
}
} finally {
disabledAdmin.drop();
enabledAdmin.drop();
}
}
Aggregations