use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.
the class BufferingTableTest method testMultiGetIncludesBuffer.
@Test
public void testMultiGetIncludesBuffer() throws Exception {
DatasetAdmin admin = getTableAdmin(CONTEXT1, MY_TABLE);
admin.create();
try {
// persist some data
BufferingTable table = getTable(CONTEXT1, MY_TABLE);
Transaction tx1 = txClient.startShort();
table.startTx(tx1);
// writing a couple rows
// table should look like the following, with everything in the buffer
// c1 c2 c3 c4
// r1 1 2 3 -
// r2 - 3 2 1
table.put(R1, a(C1, C2, C3), lb(1, 2, 3));
table.put(R2, a(C2, C3, C4), lb(3, 2, 1));
// check that multi-get can see buffered writes
List<Row> rows = table.get(Lists.newArrayList(new Get(R1), new Get(R2)));
Assert.assertEquals(2, rows.size());
TableAssert.assertRow(rows.get(0), R1, a(C1, C2, C3), lb(1, 2, 3));
TableAssert.assertRow(rows.get(1), R2, a(C2, C3, C4), lb(3, 2, 1));
// check multi-get with gets that specify columns, and one get that should return an empty row
rows = table.get(Lists.newArrayList(new Get(R1, C2, C3), new Get(R2, C2, C3), new Get(R3)));
Assert.assertEquals(3, rows.size());
TableAssert.assertRow(rows.get(0), R1, a(C2, C3), lb(2, 3));
TableAssert.assertRow(rows.get(1), R2, a(C2, C3), lb(3, 2));
Assert.assertTrue(rows.get(2).isEmpty());
// persist changes
Collection<byte[]> txChanges = table.getTxChanges();
Assert.assertTrue(txClient.canCommit(tx1, txChanges));
Assert.assertTrue(table.commitTx());
Assert.assertTrue(txClient.commit(tx1));
table.postTxCommit();
// start another transaction
Transaction tx2 = txClient.startShort();
table.startTx(tx2);
// now add another row, delete a row, and change some column values
// table should look like the following
// c1 c2 c3 c4 c5
// r1 - - 3 2 -
// r3 - - - - 1
table.put(R1, a(C2, C3, C4), lb(4, 3, 2));
table.delete(R1, a(C1, C2));
table.delete(R2);
table.put(R3, C5, L1);
// verify multi-get sees persisted data with buffer applied on top
rows = table.get(Lists.newArrayList(new Get(R1), new Get(R2), new Get(R3)));
Assert.assertEquals(3, rows.size());
TableAssert.assertRow(rows.get(0), R1, a(C3, C4), lb(3, 2));
Assert.assertTrue(rows.get(1).isEmpty());
TableAssert.assertRow(rows.get(2), R3, a(C5), lb(1));
// pretend there was a write conflict and rollback changes
Assert.assertTrue(table.rollbackTx());
txClient.abort(tx2);
// start another transaction and make sure it can't see what was done before
Transaction tx3 = txClient.startShort();
table.startTx(tx3);
rows = table.get(Lists.newArrayList(new Get(R1), new Get(R2)));
Assert.assertEquals(2, rows.size());
TableAssert.assertRow(rows.get(0), R1, a(C1, C2, C3), lb(1, 2, 3));
TableAssert.assertRow(rows.get(1), R2, a(C2, C3, C4), lb(3, 2, 1));
} finally {
admin.drop();
}
}
use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.
the class DataQualityAppTest method testDefaultConfig.
@Test
public void testDefaultConfig() throws Exception {
Map<String, Set<String>> testMap = new HashMap<>();
Set<String> testSet = new HashSet<>();
testSet.add("DiscreteValuesHistogram");
testMap.put("content_length", testSet);
DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "dataQuality", testMap);
ApplicationId appId = NamespaceId.DEFAULT.app("newApp");
AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
ApplicationManager applicationManager = deployApplication(appId, appRequest);
MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
Table logDataStore = (Table) getDataset("dataQuality").get();
DiscreteValuesHistogram discreteValuesHistogramAggregationFunction = new DiscreteValuesHistogram();
Row row;
try (Scanner scanner = logDataStore.scan(null, null)) {
while ((row = scanner.next()) != null) {
if (Bytes.toString(row.getRow()).contains("content_length")) {
Map<byte[], byte[]> columnsMapBytes = row.getColumns();
byte[] output = columnsMapBytes.get(Bytes.toBytes("DiscreteValuesHistogram"));
if (output != null) {
discreteValuesHistogramAggregationFunction.combine(output);
}
}
}
}
Map<String, Integer> outputMap = discreteValuesHistogramAggregationFunction.retrieveAggregation();
Map<String, Integer> expectedMap = Maps.newHashMap();
expectedMap.put("256", 3);
Assert.assertEquals(expectedMap, outputMap);
}
use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.
the class TopKCollector method readWordAssocs.
/**
* Returns the top words associated with the specified word and the number
* of times the words have appeared together.
* @param word the word of interest
* @param limit the number of associations to return, at most
* @return a map of the top associated words to their co-occurrence count
*/
@ReadOnly
public Map<String, Long> readWordAssocs(String word, int limit) {
// Retrieve all columns of the word’s row
Row result = this.table.get(new Get(word));
TopKCollector collector = new TopKCollector(limit);
if (!result.isEmpty()) {
// Iterate over all columns
for (Map.Entry<byte[], byte[]> entry : result.getColumns().entrySet()) {
collector.add(Bytes.toLong(entry.getValue()), Bytes.toString(entry.getKey()));
}
}
return collector.getTopK();
}
use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.
the class IndexedTable method incrementAndGet.
/**
* Increments (atomically) the specified row and columns by the specified amounts, and returns the new values.
* Note that performing this operation on an indexed column will generally have a negative impact on performance,
* since up to three writes will need to be performed for every increment (one removing the index for the previous,
* pre-increment value, one adding the index for the incremented value, and one for the increment itself).
*
* @see Table#incrementAndGet(byte[], byte[][], long[])
*/
@ReadWrite
@Override
public Row incrementAndGet(byte[] row, byte[][] columns, long[] amounts) {
if (columns.length != amounts.length) {
throw new IllegalArgumentException("Size of columns and amounts arguments must match");
}
Row existingRow = table.get(row, columns);
byte[][] updatedValues = new byte[columns.length][];
NavigableMap<byte[], byte[]> result = new TreeMap<>(Bytes.BYTES_COMPARATOR);
for (int i = 0; i < columns.length; i++) {
long existingValue = 0L;
byte[] existingBytes = existingRow.get(columns[i]);
if (existingBytes != null) {
if (existingBytes.length != Bytes.SIZEOF_LONG) {
throw new NumberFormatException("Attempted to increment a value that is not convertible to long," + " row: " + Bytes.toStringBinary(row) + " column: " + Bytes.toStringBinary(columns[i]));
}
existingValue = Bytes.toLong(existingBytes);
if (indexedColumns.contains(columns[i])) {
index.delete(createIndexKey(row, columns[i], existingBytes), IDX_COL);
}
}
updatedValues[i] = Bytes.toBytes(existingValue + amounts[i]);
result.put(columns[i], updatedValues[i]);
if (indexedColumns.contains(columns[i])) {
index.put(createIndexKey(row, columns[i], updatedValues[i]), IDX_COL, row);
}
}
table.put(row, columns, updatedValues);
return new Result(row, result);
}
use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.
the class IndexedTable method delete.
@WriteOnly
@Override
public void delete(byte[] row, byte[][] columns) {
Row existingRow = table.get(row, columns);
if (existingRow.isEmpty()) {
// no row to delete
return;
}
// delete all index entries
deleteIndexEntries(existingRow);
// delete the row's columns
table.delete(row, columns);
}
Aggregations