use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.
the class TableTest method testEmptyGet.
@Test
public void testEmptyGet() throws Exception {
DatasetAdmin admin = getTableAdmin(CONTEXT1, MY_TABLE);
admin.create();
try (Table myTable = getTable(CONTEXT1, MY_TABLE)) {
Transaction tx = txClient.startShort();
((TransactionAware) myTable).startTx(tx);
myTable.put(R1, C1, V1);
myTable.put(R1, C2, V2);
// to be used for validation later
TreeMap<byte[], byte[]> expectedColumns = new TreeMap<>(Bytes.BYTES_COMPARATOR);
expectedColumns.put(C1, V1);
expectedColumns.put(C2, V2);
Result expectedResult = new Result(R1, expectedColumns);
Result emptyResult = new Result(R1, ImmutableMap.<byte[], byte[]>of());
((TransactionAware) myTable).commitTx();
txClient.commitOrThrow(tx);
// start another transaction, so that the buffering table doesn't cache the values; the underlying Table
// implementations are tested this way.
tx = txClient.startShort();
((TransactionAware) myTable).startTx(tx);
Row row = myTable.get(R1, new byte[][] { C1, C2 });
assertEquals(expectedResult, row);
// passing in empty columns returns empty result
row = myTable.get(R1, new byte[][] {});
assertEquals(emptyResult, row);
// test all the Get constructors and their behavior
// constructors specifying only rowkey retrieve all columns
Get get = new Get(R1);
Assert.assertNull(get.getColumns());
assertEquals(expectedResult, myTable.get(get));
get = new Get(Bytes.toString(R1));
Assert.assertNull(get.getColumns());
assertEquals(expectedResult, myTable.get(get));
get.add(C1);
get.add(Bytes.toString(C2));
assertEquals(expectedResult, myTable.get(get));
// constructor specifying columns, but with an empty array/collection retrieve 0 columns
get = new Get(R1, new byte[][] {});
Assert.assertNotNull(get.getColumns());
assertEquals(emptyResult, myTable.get(get));
get = new Get(R1, ImmutableList.<byte[]>of());
Assert.assertNotNull(get.getColumns());
assertEquals(emptyResult, myTable.get(get));
get = new Get(Bytes.toString(R1), new String[] {});
Assert.assertNotNull(get.getColumns());
assertEquals(emptyResult, myTable.get(get));
get = new Get(Bytes.toString(R1), ImmutableList.<String>of());
Assert.assertNotNull(get.getColumns());
assertEquals(emptyResult, myTable.get(get));
row = myTable.get(R1, new byte[][] {});
assertEquals(emptyResult, row);
txClient.abort(tx);
} finally {
admin.drop();
}
}
use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.
the class TableTest method testMultiGetWithTx.
@Test
public void testMultiGetWithTx() throws Exception {
String testMultiGet = "testMultiGet";
DatasetAdmin admin = getTableAdmin(CONTEXT1, testMultiGet);
admin.create();
try (Table table = getTable(CONTEXT1, testMultiGet)) {
Transaction tx = txClient.startShort();
((TransactionAware) table).startTx(tx);
for (int i = 0; i < 100; i++) {
table.put(new Put(Bytes.toBytes("r" + i)).add(C1, V1).add(C2, V2));
}
txClient.canCommitOrThrow(tx, ((TransactionAware) table).getTxChanges());
Assert.assertTrue(((TransactionAware) table).commitTx());
txClient.commitOrThrow(tx);
Transaction tx2 = txClient.startShort();
((TransactionAware) table).startTx(tx2);
List<Get> gets = Lists.newArrayListWithCapacity(100);
for (int i = 0; i < 100; i++) {
gets.add(new Get(Bytes.toBytes("r" + i)));
}
List<Row> results = table.get(gets);
txClient.commitOrThrow(tx2);
for (int i = 0; i < 100; i++) {
Row row = results.get(i);
Assert.assertArrayEquals(Bytes.toBytes("r" + i), row.getRow());
byte[] val = row.get(C1);
Assert.assertNotNull(val);
Assert.assertArrayEquals(V1, val);
byte[] val2 = row.get(C2);
Assert.assertNotNull(val2);
Assert.assertArrayEquals(V2, val2);
}
Transaction tx3 = txClient.startShort();
((TransactionAware) table).startTx(tx3);
gets = Lists.newArrayListWithCapacity(100);
for (int i = 0; i < 100; i++) {
gets.add(new Get("r" + i).add(C1));
}
results = table.get(gets);
txClient.commitOrThrow(tx3);
for (int i = 0; i < 100; i++) {
Row row = results.get(i);
Assert.assertArrayEquals(Bytes.toBytes("r" + i), row.getRow());
byte[] val = row.get(C1);
Assert.assertNotNull(val);
Assert.assertArrayEquals(V1, val);
// should have only returned column 1
byte[] val2 = row.get(C2);
Assert.assertNull(val2);
}
// retrieve different columns per row
Transaction tx4 = txClient.startShort();
((TransactionAware) table).startTx(tx4);
gets = Lists.newArrayListWithCapacity(100);
for (int i = 0; i < 100; i++) {
Get get = new Get("r" + i);
// evens get C1, odds get C2
get.add(i % 2 == 0 ? C1 : C2);
gets.add(get);
}
results = table.get(gets);
txClient.commitOrThrow(tx4);
for (int i = 0; i < 100; i++) {
Row row = results.get(i);
Assert.assertArrayEquals(Bytes.toBytes("r" + i), row.getRow());
byte[] val1 = row.get(C1);
byte[] val2 = row.get(C2);
if (i % 2 == 0) {
Assert.assertNotNull(val1);
Assert.assertArrayEquals(V1, val1);
Assert.assertNull(val2);
} else {
Assert.assertNull(val1);
Assert.assertNotNull(val2);
Assert.assertArrayEquals(V2, val2);
}
}
} finally {
admin.drop();
}
}
use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.
the class BufferingTableTest method verify.
private void verify(BufferingTable table, byte[] row, byte[] col, byte[] val) throws Exception {
// get column
Assert.assertArrayEquals(val, table.get(row, col));
// get set of columns
Row getColSetRow = table.get(row, new byte[][] { col });
Map<byte[], byte[]> getColSetResult = getColSetRow.getColumns();
Assert.assertEquals(1, getColSetResult.size());
Assert.assertArrayEquals(val, getColSetResult.get(col));
// get row
Row getRow = table.get(row);
Map<byte[], byte[]> getRowResult = getRow.getColumns();
Assert.assertEquals(1, getRowResult.size());
Assert.assertArrayEquals(val, getRowResult.get(col));
// scan
Scanner scan = table.scan(row, null);
Row next = scan.next();
Assert.assertNotNull(next);
Assert.assertArrayEquals(row, next.getRow());
Assert.assertArrayEquals(val, next.get(col));
Assert.assertNull(scan.next());
}
use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.
the class MapReduceWithPartitionedTest method testPartitionedFileSetWithMR.
private void testPartitionedFileSetWithMR(boolean useCombineFileInputFormat) throws Exception {
ApplicationWithPrograms app = deployApp(AppWithPartitionedFileSet.class, new AppWithPartitionedFileSet.AppConfig(useCombineFileInputFormat));
// write a value to the input table
final Table table = datasetCache.getDataset(AppWithPartitionedFileSet.INPUT);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
table.put(Bytes.toBytes("x"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
}
});
// a partition key for the map/reduce output
final PartitionKey keyX = PartitionKey.builder().addStringField("type", "x").addLongField("time", 150000L).build();
// run the partition writer m/r with this output partition time
Map<String, String> runtimeArguments = Maps.newHashMap();
Map<String, String> outputArgs = Maps.newHashMap();
PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
// this should have created a partition in the tpfs
final PartitionedFileSet dataset = datasetCache.getDataset(PARTITIONED);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Partition partition = dataset.getPartition(keyX);
Assert.assertNotNull(partition);
String path = partition.getRelativePath();
Assert.assertTrue(path.contains("x"));
Assert.assertTrue(path.contains("150000"));
}
});
// delete the data in the input table and write a new row
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
table.delete(Bytes.toBytes("x"));
table.put(Bytes.toBytes("y"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
}
});
// a new partition key for the next map/reduce
final PartitionKey keyY = PartitionKey.builder().addStringField("type", "y").addLongField("time", 200000L).build();
// now run the m/r again with a new partition time, say 5 minutes later
PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
// this should have created a partition in the tpfs
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Partition partition = dataset.getPartition(keyY);
Assert.assertNotNull(partition);
String path = partition.getRelativePath();
Assert.assertNotNull(path);
Assert.assertTrue(path.contains("y"));
Assert.assertTrue(path.contains("200000"));
}
});
// a partition filter that matches the outputs of both map/reduces
PartitionFilter filterXY = PartitionFilter.builder().addRangeCondition("type", "x", "z").build();
// now run a map/reduce that reads all the partitions
runtimeArguments = Maps.newHashMap();
Map<String, String> inputArgs = Maps.newHashMap();
PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterXY);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "a");
Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read both partitions - and written both x and y to row a
final Table output = datasetCache.getDataset(AppWithPartitionedFileSet.OUTPUT);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("a"));
Assert.assertEquals("1", row.getString("x"));
Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
Assert.assertEquals("2", row.getString("y"));
Assert.assertEquals("{type=y, time=200000}", row.getString("y_key"));
}
});
// a partition filter that matches the output key of the first map/reduce
PartitionFilter filterX = PartitionFilter.builder().addValueCondition("type", "x").addRangeCondition("time", null, 160000L).build();
// now run a map/reduce that reads a range of the partitions, namely the first one
inputArgs.clear();
PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterX);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "b");
Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read the first partition only - and written only x to row b
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("b"));
Assert.assertEquals("1", row.getString("x"));
Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
Assert.assertNull(row.get("y"));
Assert.assertNull(row.get("y_key"));
}
});
// a partition filter that matches no key
PartitionFilter filterMT = PartitionFilter.builder().addValueCondition("type", "nosuchthing").build();
// now run a map/reduce that reads an empty range of partitions (the filter matches nothing)
inputArgs.clear();
PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterMT);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "n");
Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read no partitions - and written nothing to row n
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("n"));
Assert.assertTrue(row.isEmpty());
}
});
}
use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.
the class MetadataDataset method getMetadata.
/**
* Retrieves the metadata for the specified {@link MetadataEntity}.
*
* @param metadataEntity the specified {@link MetadataEntity}
* @return {@link Record} representing the metadata for the specified {@link MetadataEntity}
*/
public Record getMetadata(MetadataEntity metadataEntity) {
MDSKey mdsKey = MetadataKey.createValueRowKey(metadataEntity, null);
byte[] startKey = mdsKey.getKey();
byte[] stopKey = Bytes.stopKeyForPrefix(startKey);
Map<String, String> metadata = new HashMap<>();
try (Scanner scan = indexedTable.scan(startKey, stopKey)) {
Row next;
while ((next = scan.next()) != null) {
String key = MetadataKey.extractMetadataKey(next.getRow());
byte[] value = next.get(VALUE_COLUMN);
if (key == null || value == null) {
continue;
}
metadata.put(key, Bytes.toString(value));
}
}
Set<String> tags = splitTags(metadata.get(MetadataConstants.TAGS_KEY));
// safe to remove since splitTags above copies to new HashSet
// now we are only left with properties
metadata.remove(MetadataConstants.TAGS_KEY);
return new Record(metadataEntity, metadata, tags);
}
Aggregations